yt_dlp/utils.py

   1 import asyncio
   2 import atexit
   3 import base64
   4 import binascii
   5 import calendar
   6 import codecs
   7 import collections
   8 import collections.abc
   9 import contextlib
  10 import datetime
  11 import email.header
  12 import email.utils
  13 import errno
  14 import gzip
  15 import hashlib
  16 import hmac
  17 import html.entities
  18 import html.parser
  19 import http.client
  20 import http.cookiejar
  21 import importlib.util
  22 import inspect
  23 import io
  24 import itertools
  25 import json
  26 import locale
  27 import math
  28 import mimetypes
  29 import operator
  30 import os
  31 import platform
  32 import random
  33 import re
  34 import shlex
  35 import socket
  36 import ssl
  37 import struct
  38 import subprocess
  39 import sys
  40 import tempfile
  41 import time
  42 import traceback
  43 import types
  44 import unicodedata
  45 import urllib.error
  46 import urllib.parse
  47 import urllib.request
  48 import xml.etree.ElementTree
  49 import zlib
  50
  51 from .compat import functools  # isort: split
  52 from .compat import (
  53     compat_etree_fromstring,
  54     compat_expanduser,
  55     compat_HTMLParseError,
  56     compat_os_name,
  57     compat_shlex_quote,
  58 )
  59 from .dependencies import brotli, certifi, websockets, xattr
  60 from .socks import ProxyType, sockssocket
  61
  62
  63 def register_socks_protocols():
  64     # "Register" SOCKS protocols
  65     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  66     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  67     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  68         if scheme not in urllib.parse.uses_netloc:
  69             urllib.parse.uses_netloc.append(scheme)
  70
  71
  72 # This is not clearly defined otherwise
  73 compiled_regex_type = type(re.compile(''))
  74
  75
  76 def random_user_agent():
  77     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  78     _CHROME_VERSIONS = (
  79         '90.0.4430.212',
  80         '90.0.4430.24',
  81         '90.0.4430.70',
  82         '90.0.4430.72',
  83         '90.0.4430.85',
  84         '90.0.4430.93',
  85         '91.0.4472.101',
  86         '91.0.4472.106',
  87         '91.0.4472.114',
  88         '91.0.4472.124',
  89         '91.0.4472.164',
  90         '91.0.4472.19',
  91         '91.0.4472.77',
  92         '92.0.4515.107',
  93         '92.0.4515.115',
  94         '92.0.4515.131',
  95         '92.0.4515.159',
  96         '92.0.4515.43',
  97         '93.0.4556.0',
  98         '93.0.4577.15',
  99         '93.0.4577.63',
 100         '93.0.4577.82',
 101         '94.0.4606.41',
 102         '94.0.4606.54',
 103         '94.0.4606.61',
 104         '94.0.4606.71',
 105         '94.0.4606.81',
 106         '94.0.4606.85',
 107         '95.0.4638.17',
 108         '95.0.4638.50',
 109         '95.0.4638.54',
 110         '95.0.4638.69',
 111         '95.0.4638.74',
 112         '96.0.4664.18',
 113         '96.0.4664.45',
 114         '96.0.4664.55',
 115         '96.0.4664.93',
 116         '97.0.4692.20',
 117     )
 118     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 119
 120
 121 SUPPORTED_ENCODINGS = [
 122     'gzip', 'deflate'
 123 ]
 124 if brotli:
 125     SUPPORTED_ENCODINGS.append('br')
 126
 127 std_headers = {
 128     'User-Agent': random_user_agent(),
 129     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 130     'Accept-Language': 'en-us,en;q=0.5',
 131     'Sec-Fetch-Mode': 'navigate',
 132 }
 133
 134
 135 USER_AGENTS = {
 136     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 137 }
 138
 139
 140 NO_DEFAULT = object()
 141 IDENTITY = lambda x: x
 142
 143 ENGLISH_MONTH_NAMES = [
 144     'January', 'February', 'March', 'April', 'May', 'June',
 145     'July', 'August', 'September', 'October', 'November', 'December']
 146
 147 MONTH_NAMES = {
 148     'en': ENGLISH_MONTH_NAMES,
 149     'fr': [
 150         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 151         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 152 }
 153
 154 # From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
 155 TIMEZONE_NAMES = {
 156     'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
 157     'AST': -4, 'ADT': -3,  # Atlantic (used in Canada)
 158     'EST': -5, 'EDT': -4,  # Eastern
 159     'CST': -6, 'CDT': -5,  # Central
 160     'MST': -7, 'MDT': -6,  # Mountain
 161     'PST': -8, 'PDT': -7   # Pacific
 162 }
 163
 164 # needed for sanitizing filenames in restricted mode
 165 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 166                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 167                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 168
 169 DATE_FORMATS = (
 170     '%d %B %Y',
 171     '%d %b %Y',
 172     '%B %d %Y',
 173     '%B %dst %Y',
 174     '%B %dnd %Y',
 175     '%B %drd %Y',
 176     '%B %dth %Y',
 177     '%b %d %Y',
 178     '%b %dst %Y',
 179     '%b %dnd %Y',
 180     '%b %drd %Y',
 181     '%b %dth %Y',
 182     '%b %dst %Y %I:%M',
 183     '%b %dnd %Y %I:%M',
 184     '%b %drd %Y %I:%M',
 185     '%b %dth %Y %I:%M',
 186     '%Y %m %d',
 187     '%Y-%m-%d',
 188     '%Y.%m.%d.',
 189     '%Y/%m/%d',
 190     '%Y/%m/%d %H:%M',
 191     '%Y/%m/%d %H:%M:%S',
 192     '%Y%m%d%H%M',
 193     '%Y%m%d%H%M%S',
 194     '%Y%m%d',
 195     '%Y-%m-%d %H:%M',
 196     '%Y-%m-%d %H:%M:%S',
 197     '%Y-%m-%d %H:%M:%S.%f',
 198     '%Y-%m-%d %H:%M:%S:%f',
 199     '%d.%m.%Y %H:%M',
 200     '%d.%m.%Y %H.%M',
 201     '%Y-%m-%dT%H:%M:%SZ',
 202     '%Y-%m-%dT%H:%M:%S.%fZ',
 203     '%Y-%m-%dT%H:%M:%S.%f0Z',
 204     '%Y-%m-%dT%H:%M:%S',
 205     '%Y-%m-%dT%H:%M:%S.%f',
 206     '%Y-%m-%dT%H:%M',
 207     '%b %d %Y at %H:%M',
 208     '%b %d %Y at %H:%M:%S',
 209     '%B %d %Y at %H:%M',
 210     '%B %d %Y at %H:%M:%S',
 211     '%H:%M %d-%b-%Y',
 212 )
 213
 214 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 215 DATE_FORMATS_DAY_FIRST.extend([
 216     '%d-%m-%Y',
 217     '%d.%m.%Y',
 218     '%d.%m.%y',
 219     '%d/%m/%Y',
 220     '%d/%m/%y',
 221     '%d/%m/%Y %H:%M:%S',
 222     '%d-%m-%Y %H:%M',
 223 ])
 224
 225 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 226 DATE_FORMATS_MONTH_FIRST.extend([
 227     '%m-%d-%Y',
 228     '%m.%d.%Y',
 229     '%m/%d/%Y',
 230     '%m/%d/%y',
 231     '%m/%d/%Y %H:%M:%S',
 232 ])
 233
 234 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 235 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?}|\[.+?\])\s*</script>'
 236
 237 NUMBER_RE = r'\d+(?:\.\d+)?'
 238
 239
 240 @functools.cache
 241 def preferredencoding():
 242     """Get preferred encoding.
 243
 244     Returns the best encoding scheme for the system, based on
 245     locale.getpreferredencoding() and some further tweaks.
 246     """
 247     try:
 248         pref = locale.getpreferredencoding()
 249         'TEST'.encode(pref)
 250     except Exception:
 251         pref = 'UTF-8'
 252
 253     return pref
 254
 255
 256 def write_json_file(obj, fn):
 257     """ Encode obj as JSON and write it to fn, atomically if possible """
 258
 259     tf = tempfile.NamedTemporaryFile(
 260         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 261         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 262
 263     try:
 264         with tf:
 265             json.dump(obj, tf, ensure_ascii=False)
 266         if sys.platform == 'win32':
 267             # Need to remove existing file on Windows, else os.rename raises
 268             # WindowsError or FileExistsError.
 269             with contextlib.suppress(OSError):
 270                 os.unlink(fn)
 271         with contextlib.suppress(OSError):
 272             mask = os.umask(0)
 273             os.umask(mask)
 274             os.chmod(tf.name, 0o666 & ~mask)
 275         os.rename(tf.name, fn)
 276     except Exception:
 277         with contextlib.suppress(OSError):
 278             os.remove(tf.name)
 279         raise
 280
 281
 282 def find_xpath_attr(node, xpath, key, val=None):
 283     """ Find the xpath xpath[@key=val] """
 284     assert re.match(r'^[a-zA-Z_-]+$', key)
 285     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 286     return node.find(expr)
 287
 288 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 289 # the namespace parameter
 290
 291
 292 def xpath_with_ns(path, ns_map):
 293     components = [c.split(':') for c in path.split('/')]
 294     replaced = []
 295     for c in components:
 296         if len(c) == 1:
 297             replaced.append(c[0])
 298         else:
 299             ns, tag = c
 300             replaced.append('{%s}%s' % (ns_map[ns], tag))
 301     return '/'.join(replaced)
 302
 303
 304 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 305     def _find_xpath(xpath):
 306         return node.find(xpath)
 307
 308     if isinstance(xpath, str):
 309         n = _find_xpath(xpath)
 310     else:
 311         for xp in xpath:
 312             n = _find_xpath(xp)
 313             if n is not None:
 314                 break
 315
 316     if n is None:
 317         if default is not NO_DEFAULT:
 318             return default
 319         elif fatal:
 320             name = xpath if name is None else name
 321             raise ExtractorError('Could not find XML element %s' % name)
 322         else:
 323             return None
 324     return n
 325
 326
 327 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 328     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 329     if n is None or n == default:
 330         return n
 331     if n.text is None:
 332         if default is not NO_DEFAULT:
 333             return default
 334         elif fatal:
 335             name = xpath if name is None else name
 336             raise ExtractorError('Could not find XML element\'s text %s' % name)
 337         else:
 338             return None
 339     return n.text
 340
 341
 342 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 343     n = find_xpath_attr(node, xpath, key)
 344     if n is None:
 345         if default is not NO_DEFAULT:
 346             return default
 347         elif fatal:
 348             name = f'{xpath}[@{key}]' if name is None else name
 349             raise ExtractorError('Could not find XML attribute %s' % name)
 350         else:
 351             return None
 352     return n.attrib[key]
 353
 354
 355 def get_element_by_id(id, html, **kwargs):
 356     """Return the content of the tag with the specified ID in the passed HTML document"""
 357     return get_element_by_attribute('id', id, html, **kwargs)
 358
 359
 360 def get_element_html_by_id(id, html, **kwargs):
 361     """Return the html of the tag with the specified ID in the passed HTML document"""
 362     return get_element_html_by_attribute('id', id, html, **kwargs)
 363
 364
 365 def get_element_by_class(class_name, html):
 366     """Return the content of the first tag with the specified class in the passed HTML document"""
 367     retval = get_elements_by_class(class_name, html)
 368     return retval[0] if retval else None
 369
 370
 371 def get_element_html_by_class(class_name, html):
 372     """Return the html of the first tag with the specified class in the passed HTML document"""
 373     retval = get_elements_html_by_class(class_name, html)
 374     return retval[0] if retval else None
 375
 376
 377 def get_element_by_attribute(attribute, value, html, **kwargs):
 378     retval = get_elements_by_attribute(attribute, value, html, **kwargs)
 379     return retval[0] if retval else None
 380
 381
 382 def get_element_html_by_attribute(attribute, value, html, **kargs):
 383     retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
 384     return retval[0] if retval else None
 385
 386
 387 def get_elements_by_class(class_name, html, **kargs):
 388     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 389     return get_elements_by_attribute(
 390         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 391         html, escape_value=False)
 392
 393
 394 def get_elements_html_by_class(class_name, html):
 395     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 396     return get_elements_html_by_attribute(
 397         'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
 398         html, escape_value=False)
 399
 400
 401 def get_elements_by_attribute(*args, **kwargs):
 402     """Return the content of the tag with the specified attribute in the passed HTML document"""
 403     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 404
 405
 406 def get_elements_html_by_attribute(*args, **kwargs):
 407     """Return the html of the tag with the specified attribute in the passed HTML document"""
 408     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 409
 410
 411 def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True):
 412     """
 413     Return the text (content) and the html (whole) of the tag with the specified
 414     attribute in the passed HTML document
 415     """
 416
 417     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 418
 419     value = re.escape(value) if escape_value else value
 420
 421     partial_element_re = rf'''(?x)
 422         <(?P<tag>[a-zA-Z0-9:._-]+)
 423          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 424          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 425         '''
 426
 427     for m in re.finditer(partial_element_re, html):
 428         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 429
 430         yield (
 431             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 432             whole
 433         )
 434
 435
 436 class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
 437     """
 438     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 439     closing tag for the first opening tag it has encountered, and can be used
 440     as a context manager
 441     """
 442
 443     class HTMLBreakOnClosingTagException(Exception):
 444         pass
 445
 446     def __init__(self):
 447         self.tagstack = collections.deque()
 448         html.parser.HTMLParser.__init__(self)
 449
 450     def __enter__(self):
 451         return self
 452
 453     def __exit__(self, *_):
 454         self.close()
 455
 456     def close(self):
 457         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 458         # so data remains buffered; we no longer have any interest in it, thus
 459         # override this method to discard it
 460         pass
 461
 462     def handle_starttag(self, tag, _):
 463         self.tagstack.append(tag)
 464
 465     def handle_endtag(self, tag):
 466         if not self.tagstack:
 467             raise compat_HTMLParseError('no tags in the stack')
 468         while self.tagstack:
 469             inner_tag = self.tagstack.pop()
 470             if inner_tag == tag:
 471                 break
 472         else:
 473             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 474         if not self.tagstack:
 475             raise self.HTMLBreakOnClosingTagException()
 476
 477
 478 def get_element_text_and_html_by_tag(tag, html):
 479     """
 480     For the first element with the specified tag in the passed HTML document
 481     return its' content (text) and the whole element (html)
 482     """
 483     def find_or_raise(haystack, needle, exc):
 484         try:
 485             return haystack.index(needle)
 486         except ValueError:
 487             raise exc
 488     closing_tag = f'</{tag}>'
 489     whole_start = find_or_raise(
 490         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 491     content_start = find_or_raise(
 492         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 493     content_start += whole_start + 1
 494     with HTMLBreakOnClosingTagParser() as parser:
 495         parser.feed(html[whole_start:content_start])
 496         if not parser.tagstack or parser.tagstack[0] != tag:
 497             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 498         offset = content_start
 499         while offset < len(html):
 500             next_closing_tag_start = find_or_raise(
 501                 html[offset:], closing_tag,
 502                 compat_HTMLParseError(f'closing {tag} tag not found'))
 503             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 504             try:
 505                 parser.feed(html[offset:offset + next_closing_tag_end])
 506                 offset += next_closing_tag_end
 507             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 508                 return html[content_start:offset + next_closing_tag_start], \
 509                     html[whole_start:offset + next_closing_tag_end]
 510         raise compat_HTMLParseError('unexpected end of html')
 511
 512
 513 class HTMLAttributeParser(html.parser.HTMLParser):
 514     """Trivial HTML parser to gather the attributes for a single element"""
 515
 516     def __init__(self):
 517         self.attrs = {}
 518         html.parser.HTMLParser.__init__(self)
 519
 520     def handle_starttag(self, tag, attrs):
 521         self.attrs = dict(attrs)
 522
 523
 524 class HTMLListAttrsParser(html.parser.HTMLParser):
 525     """HTML parser to gather the attributes for the elements of a list"""
 526
 527     def __init__(self):
 528         html.parser.HTMLParser.__init__(self)
 529         self.items = []
 530         self._level = 0
 531
 532     def handle_starttag(self, tag, attrs):
 533         if tag == 'li' and self._level == 0:
 534             self.items.append(dict(attrs))
 535         self._level += 1
 536
 537     def handle_endtag(self, tag):
 538         self._level -= 1
 539
 540
 541 def extract_attributes(html_element):
 542     """Given a string for an HTML element such as
 543     <el
 544          a="foo" B="bar" c="&98;az" d=boz
 545          empty= noval entity="&amp;"
 546          sq='"' dq="'"
 547     >
 548     Decode and return a dictionary of attributes.
 549     {
 550         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 551         'empty': '', 'noval': None, 'entity': '&',
 552         'sq': '"', 'dq': '\''
 553     }.
 554     """
 555     parser = HTMLAttributeParser()
 556     with contextlib.suppress(compat_HTMLParseError):
 557         parser.feed(html_element)
 558         parser.close()
 559     return parser.attrs
 560
 561
 562 def parse_list(webpage):
 563     """Given a string for an series of HTML <li> elements,
 564     return a dictionary of their attributes"""
 565     parser = HTMLListAttrsParser()
 566     parser.feed(webpage)
 567     parser.close()
 568     return parser.items
 569
 570
 571 def clean_html(html):
 572     """Clean an HTML snippet into a readable string"""
 573
 574     if html is None:  # Convenience for sanitizing descriptions etc.
 575         return html
 576
 577     html = re.sub(r'\s+', ' ', html)
 578     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 579     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 580     # Strip html tags
 581     html = re.sub('<.*?>', '', html)
 582     # Replace html entities
 583     html = unescapeHTML(html)
 584     return html.strip()
 585
 586
 587 class LenientJSONDecoder(json.JSONDecoder):
 588     def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs):
 589         self.transform_source, self.ignore_extra = transform_source, ignore_extra
 590         super().__init__(*args, **kwargs)
 591
 592     def decode(self, s):
 593         if self.transform_source:
 594             s = self.transform_source(s)
 595         try:
 596             if self.ignore_extra:
 597                 return self.raw_decode(s.lstrip())[0]
 598             return super().decode(s)
 599         except json.JSONDecodeError as e:
 600             if e.pos is not None:
 601                 raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos)
 602             raise
 603
 604
 605 def sanitize_open(filename, open_mode):
 606     """Try to open the given filename, and slightly tweak it if this fails.
 607
 608     Attempts to open the given filename. If this fails, it tries to change
 609     the filename slightly, step by step, until it's either able to open it
 610     or it fails and raises a final exception, like the standard open()
 611     function.
 612
 613     It returns the tuple (stream, definitive_file_name).
 614     """
 615     if filename == '-':
 616         if sys.platform == 'win32':
 617             import msvcrt
 618
 619             # stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
 620             with contextlib.suppress(io.UnsupportedOperation):
 621                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 622         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 623
 624     for attempt in range(2):
 625         try:
 626             try:
 627                 if sys.platform == 'win32':
 628                     # FIXME: An exclusive lock also locks the file from being read.
 629                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 630                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 631                     raise LockingUnsupportedError()
 632                 stream = locked_file(filename, open_mode, block=False).__enter__()
 633             except OSError:
 634                 stream = open(filename, open_mode)
 635             return stream, filename
 636         except OSError as err:
 637             if attempt or err.errno in (errno.EACCES,):
 638                 raise
 639             old_filename, filename = filename, sanitize_path(filename)
 640             if old_filename == filename:
 641                 raise
 642
 643
 644 def timeconvert(timestr):
 645     """Convert RFC 2822 defined time string into system timestamp"""
 646     timestamp = None
 647     timetuple = email.utils.parsedate_tz(timestr)
 648     if timetuple is not None:
 649         timestamp = email.utils.mktime_tz(timetuple)
 650     return timestamp
 651
 652
 653 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 654     """Sanitizes a string so it could be used as part of a filename.
 655     @param restricted   Use a stricter subset of allowed characters
 656     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 657                         If unset, yt-dlp's new sanitization rules are in effect
 658     """
 659     if s == '':
 660         return ''
 661
 662     def replace_insane(char):
 663         if restricted and char in ACCENT_CHARS:
 664             return ACCENT_CHARS[char]
 665         elif not restricted and char == '\n':
 666             return '\0 '
 667         elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?|/\\':
 668             # Replace with their full-width unicode counterparts
 669             return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
 670         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 671             return ''
 672         elif char == '"':
 673             return '' if restricted else '\''
 674         elif char == ':':
 675             return '\0_\0-' if restricted else '\0 \0-'
 676         elif char in '\\/|*<>':
 677             return '\0_'
 678         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 679             return '\0_'
 680         return char
 681
 682     if restricted and is_id is NO_DEFAULT:
 683         s = unicodedata.normalize('NFKC', s)
 684     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 685     result = ''.join(map(replace_insane, s))
 686     if is_id is NO_DEFAULT:
 687         result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result)  # Remove repeated substitute chars
 688         STRIP_RE = r'(?:\0.|[ _-])*'
 689         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 690     result = result.replace('\0', '') or '_'
 691
 692     if not is_id:
 693         while '__' in result:
 694             result = result.replace('__', '_')
 695         result = result.strip('_')
 696         # Common case of "Foreign band name - English song title"
 697         if restricted and result.startswith('-_'):
 698             result = result[2:]
 699         if result.startswith('-'):
 700             result = '_' + result[len('-'):]
 701         result = result.lstrip('.')
 702         if not result:
 703             result = '_'
 704     return result
 705
 706
 707 def sanitize_path(s, force=False):
 708     """Sanitizes and normalizes path on Windows"""
 709     if sys.platform == 'win32':
 710         force = False
 711         drive_or_unc, _ = os.path.splitdrive(s)
 712     elif force:
 713         drive_or_unc = ''
 714     else:
 715         return s
 716
 717     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 718     if drive_or_unc:
 719         norm_path.pop(0)
 720     sanitized_path = [
 721         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 722         for path_part in norm_path]
 723     if drive_or_unc:
 724         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 725     elif force and s and s[0] == os.path.sep:
 726         sanitized_path.insert(0, os.path.sep)
 727     return os.path.join(*sanitized_path)
 728
 729
 730 def sanitize_url(url, *, scheme='http'):
 731     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 732     # the number of unwanted failures due to missing protocol
 733     if url is None:
 734         return
 735     elif url.startswith('//'):
 736         return f'{scheme}:{url}'
 737     # Fix some common typos seen so far
 738     COMMON_TYPOS = (
 739         # https://github.com/ytdl-org/youtube-dl/issues/15649
 740         (r'^httpss://', r'https://'),
 741         # https://bx1.be/lives/direct-tv/
 742         (r'^rmtp([es]?)://', r'rtmp\1://'),
 743     )
 744     for mistake, fixup in COMMON_TYPOS:
 745         if re.match(mistake, url):
 746             return re.sub(mistake, fixup, url)
 747     return url
 748
 749
 750 def extract_basic_auth(url):
 751     parts = urllib.parse.urlsplit(url)
 752     if parts.username is None:
 753         return url, None
 754     url = urllib.parse.urlunsplit(parts._replace(netloc=(
 755         parts.hostname if parts.port is None
 756         else '%s:%d' % (parts.hostname, parts.port))))
 757     auth_payload = base64.b64encode(
 758         ('%s:%s' % (parts.username, parts.password or '')).encode())
 759     return url, f'Basic {auth_payload.decode()}'
 760
 761
 762 def sanitized_Request(url, *args, **kwargs):
 763     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 764     if auth_header is not None:
 765         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 766         headers['Authorization'] = auth_header
 767     return urllib.request.Request(url, *args, **kwargs)
 768
 769
 770 def expand_path(s):
 771     """Expand shell variables and ~"""
 772     return os.path.expandvars(compat_expanduser(s))
 773
 774
 775 def orderedSet(iterable, *, lazy=False):
 776     """Remove all duplicates from the input iterable"""
 777     def _iter():
 778         seen = []  # Do not use set since the items can be unhashable
 779         for x in iterable:
 780             if x not in seen:
 781                 seen.append(x)
 782                 yield x
 783
 784     return _iter() if lazy else list(_iter())
 785
 786
 787 def _htmlentity_transform(entity_with_semicolon):
 788     """Transforms an HTML entity to a character."""
 789     entity = entity_with_semicolon[:-1]
 790
 791     # Known non-numeric HTML entity
 792     if entity in html.entities.name2codepoint:
 793         return chr(html.entities.name2codepoint[entity])
 794
 795     # TODO: HTML5 allows entities without a semicolon.
 796     # E.g. '&Eacuteric' should be decoded as 'Éric'.
 797     if entity_with_semicolon in html.entities.html5:
 798         return html.entities.html5[entity_with_semicolon]
 799
 800     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 801     if mobj is not None:
 802         numstr = mobj.group(1)
 803         if numstr.startswith('x'):
 804             base = 16
 805             numstr = '0%s' % numstr
 806         else:
 807             base = 10
 808         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 809         with contextlib.suppress(ValueError):
 810             return chr(int(numstr, base))
 811
 812     # Unknown entity in name, return its literal representation
 813     return '&%s;' % entity
 814
 815
 816 def unescapeHTML(s):
 817     if s is None:
 818         return None
 819     assert isinstance(s, str)
 820
 821     return re.sub(
 822         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 823
 824
 825 def escapeHTML(text):
 826     return (
 827         text
 828         .replace('&', '&amp;')
 829         .replace('<', '&lt;')
 830         .replace('>', '&gt;')
 831         .replace('"', '&quot;')
 832         .replace("'", '&#39;')
 833     )
 834
 835
 836 def process_communicate_or_kill(p, *args, **kwargs):
 837     deprecation_warning(f'"{__name__}.process_communicate_or_kill" is deprecated and may be removed '
 838                         f'in a future version. Use "{__name__}.Popen.communicate_or_kill" instead')
 839     return Popen.communicate_or_kill(p, *args, **kwargs)
 840
 841
 842 class Popen(subprocess.Popen):
 843     if sys.platform == 'win32':
 844         _startupinfo = subprocess.STARTUPINFO()
 845         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 846     else:
 847         _startupinfo = None
 848
 849     @staticmethod
 850     def _fix_pyinstaller_ld_path(env):
 851         """Restore LD_LIBRARY_PATH when using PyInstaller
 852             Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
 853                  https://github.com/yt-dlp/yt-dlp/issues/4573
 854         """
 855         if not hasattr(sys, '_MEIPASS'):
 856             return
 857
 858         def _fix(key):
 859             orig = env.get(f'{key}_ORIG')
 860             if orig is None:
 861                 env.pop(key, None)
 862             else:
 863                 env[key] = orig
 864
 865         _fix('LD_LIBRARY_PATH')  # Linux
 866         _fix('DYLD_LIBRARY_PATH')  # macOS
 867
 868     def __init__(self, *args, env=None, text=False, **kwargs):
 869         if env is None:
 870             env = os.environ.copy()
 871         self._fix_pyinstaller_ld_path(env)
 872
 873         if text is True:
 874             kwargs['universal_newlines'] = True  # For 3.6 compatibility
 875             kwargs.setdefault('encoding', 'utf-8')
 876             kwargs.setdefault('errors', 'replace')
 877         super().__init__(*args, env=env, **kwargs, startupinfo=self._startupinfo)
 878
 879     def communicate_or_kill(self, *args, **kwargs):
 880         try:
 881             return self.communicate(*args, **kwargs)
 882         except BaseException:  # Including KeyboardInterrupt
 883             self.kill(timeout=None)
 884             raise
 885
 886     def kill(self, *, timeout=0):
 887         super().kill()
 888         if timeout != 0:
 889             self.wait(timeout=timeout)
 890
 891     @classmethod
 892     def run(cls, *args, timeout=None, **kwargs):
 893         with cls(*args, **kwargs) as proc:
 894             default = '' if proc.text_mode else b''
 895             stdout, stderr = proc.communicate_or_kill(timeout=timeout)
 896             return stdout or default, stderr or default, proc.returncode
 897
 898
 899 def get_subprocess_encoding():
 900     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 901         # For subprocess calls, encode with locale encoding
 902         # Refer to http://stackoverflow.com/a/9951851/35070
 903         encoding = preferredencoding()
 904     else:
 905         encoding = sys.getfilesystemencoding()
 906     if encoding is None:
 907         encoding = 'utf-8'
 908     return encoding
 909
 910
 911 def encodeFilename(s, for_subprocess=False):
 912     assert isinstance(s, str)
 913     return s
 914
 915
 916 def decodeFilename(b, for_subprocess=False):
 917     return b
 918
 919
 920 def encodeArgument(s):
 921     # Legacy code that uses byte strings
 922     # Uncomment the following line after fixing all post processors
 923     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
 924     return s if isinstance(s, str) else s.decode('ascii')
 925
 926
 927 def decodeArgument(b):
 928     return b
 929
 930
 931 def decodeOption(optval):
 932     if optval is None:
 933         return optval
 934     if isinstance(optval, bytes):
 935         optval = optval.decode(preferredencoding())
 936
 937     assert isinstance(optval, str)
 938     return optval
 939
 940
 941 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 942
 943
 944 def timetuple_from_msec(msec):
 945     secs, msec = divmod(msec, 1000)
 946     mins, secs = divmod(secs, 60)
 947     hrs, mins = divmod(mins, 60)
 948     return _timetuple(hrs, mins, secs, msec)
 949
 950
 951 def formatSeconds(secs, delim=':', msec=False):
 952     time = timetuple_from_msec(secs * 1000)
 953     if time.hours:
 954         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 955     elif time.minutes:
 956         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 957     else:
 958         ret = '%d' % time.seconds
 959     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 960
 961
 962 def _ssl_load_windows_store_certs(ssl_context, storename):
 963     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 964     try:
 965         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 966                  if encoding == 'x509_asn' and (
 967                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 968     except PermissionError:
 969         return
 970     for cert in certs:
 971         with contextlib.suppress(ssl.SSLError):
 972             ssl_context.load_verify_locations(cadata=cert)
 973
 974
 975 def make_HTTPS_handler(params, **kwargs):
 976     opts_check_certificate = not params.get('nocheckcertificate')
 977     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 978     context.check_hostname = opts_check_certificate
 979     if params.get('legacyserverconnect'):
 980         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 981         # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
 982         context.set_ciphers('DEFAULT')
 983
 984     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
 985     if opts_check_certificate:
 986         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
 987             context.load_verify_locations(cafile=certifi.where())
 988         else:
 989             try:
 990                 context.load_default_certs()
 991                 # Work around the issue in load_default_certs when there are bad certificates. See:
 992                 # https://github.com/yt-dlp/yt-dlp/issues/1060,
 993                 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
 994             except ssl.SSLError:
 995                 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
 996                 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
 997                     for storename in ('CA', 'ROOT'):
 998                         _ssl_load_windows_store_certs(context, storename)
 999                 context.set_default_verify_paths()
1000
1001     client_certfile = params.get('client_certificate')
1002     if client_certfile:
1003         try:
1004             context.load_cert_chain(
1005                 client_certfile, keyfile=params.get('client_certificate_key'),
1006                 password=params.get('client_certificate_password'))
1007         except ssl.SSLError:
1008             raise YoutubeDLError('Unable to load client certificate')
1009
1010     # Some servers may reject requests if ALPN extension is not sent. See:
1011     # https://github.com/python/cpython/issues/85140
1012     # https://github.com/yt-dlp/yt-dlp/issues/3878
1013     with contextlib.suppress(NotImplementedError):
1014         context.set_alpn_protocols(['http/1.1'])
1015
1016     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
1017
1018
1019 def bug_reports_message(before=';'):
1020     from .update import REPOSITORY
1021
1022     msg = (f'please report this issue on  https://github.com/{REPOSITORY}/issues?q= , '
1023            'filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U')
1024
1025     before = before.rstrip()
1026     if not before or before.endswith(('.', '!', '?')):
1027         msg = msg[0].title() + msg[1:]
1028
1029     return (before + ' ' if before else '') + msg
1030
1031
1032 class YoutubeDLError(Exception):
1033     """Base exception for YoutubeDL errors."""
1034     msg = None
1035
1036     def __init__(self, msg=None):
1037         if msg is not None:
1038             self.msg = msg
1039         elif self.msg is None:
1040             self.msg = type(self).__name__
1041         super().__init__(self.msg)
1042
1043
1044 network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
1045 if hasattr(ssl, 'CertificateError'):
1046     network_exceptions.append(ssl.CertificateError)
1047 network_exceptions = tuple(network_exceptions)
1048
1049
1050 class ExtractorError(YoutubeDLError):
1051     """Error during info extraction."""
1052
1053     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
1054         """ tb, if given, is the original traceback (so that it can be printed out).
1055         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
1056         """
1057         if sys.exc_info()[0] in network_exceptions:
1058             expected = True
1059
1060         self.orig_msg = str(msg)
1061         self.traceback = tb
1062         self.expected = expected
1063         self.cause = cause
1064         self.video_id = video_id
1065         self.ie = ie
1066         self.exc_info = sys.exc_info()  # preserve original exception
1067         if isinstance(self.exc_info[1], ExtractorError):
1068             self.exc_info = self.exc_info[1].exc_info
1069
1070         super().__init__(''.join((
1071             format_field(ie, None, '[%s] '),
1072             format_field(video_id, None, '%s: '),
1073             msg,
1074             format_field(cause, None, ' (caused by %r)'),
1075             '' if expected else bug_reports_message())))
1076
1077     def format_traceback(self):
1078         return join_nonempty(
1079             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1080             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1081             delim='\n') or None
1082
1083
1084 class UnsupportedError(ExtractorError):
1085     def __init__(self, url):
1086         super().__init__(
1087             'Unsupported URL: %s' % url, expected=True)
1088         self.url = url
1089
1090
1091 class RegexNotFoundError(ExtractorError):
1092     """Error when a regex didn't match"""
1093     pass
1094
1095
1096 class GeoRestrictedError(ExtractorError):
1097     """Geographic restriction Error exception.
1098
1099     This exception may be thrown when a video is not available from your
1100     geographic location due to geographic restrictions imposed by a website.
1101     """
1102
1103     def __init__(self, msg, countries=None, **kwargs):
1104         kwargs['expected'] = True
1105         super().__init__(msg, **kwargs)
1106         self.countries = countries
1107
1108
1109 class UserNotLive(ExtractorError):
1110     """Error when a channel/user is not live"""
1111
1112     def __init__(self, msg=None, **kwargs):
1113         kwargs['expected'] = True
1114         super().__init__(msg or 'The channel is not currently live', **kwargs)
1115
1116
1117 class DownloadError(YoutubeDLError):
1118     """Download Error exception.
1119
1120     This exception may be thrown by FileDownloader objects if they are not
1121     configured to continue on errors. They will contain the appropriate
1122     error message.
1123     """
1124
1125     def __init__(self, msg, exc_info=None):
1126         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1127         super().__init__(msg)
1128         self.exc_info = exc_info
1129
1130
1131 class EntryNotInPlaylist(YoutubeDLError):
1132     """Entry not in playlist exception.
1133
1134     This exception will be thrown by YoutubeDL when a requested entry
1135     is not found in the playlist info_dict
1136     """
1137     msg = 'Entry not found in info'
1138
1139
1140 class SameFileError(YoutubeDLError):
1141     """Same File exception.
1142
1143     This exception will be thrown by FileDownloader objects if they detect
1144     multiple files would have to be downloaded to the same file on disk.
1145     """
1146     msg = 'Fixed output name but more than one file to download'
1147
1148     def __init__(self, filename=None):
1149         if filename is not None:
1150             self.msg += f': {filename}'
1151         super().__init__(self.msg)
1152
1153
1154 class PostProcessingError(YoutubeDLError):
1155     """Post Processing exception.
1156
1157     This exception may be raised by PostProcessor's .run() method to
1158     indicate an error in the postprocessing task.
1159     """
1160
1161
1162 class DownloadCancelled(YoutubeDLError):
1163     """ Exception raised when the download queue should be interrupted """
1164     msg = 'The download was cancelled'
1165
1166
1167 class ExistingVideoReached(DownloadCancelled):
1168     """ --break-on-existing triggered """
1169     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1170
1171
1172 class RejectedVideoReached(DownloadCancelled):
1173     """ --break-on-reject triggered """
1174     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1175
1176
1177 class MaxDownloadsReached(DownloadCancelled):
1178     """ --max-downloads limit has been reached. """
1179     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1180
1181
1182 class ReExtractInfo(YoutubeDLError):
1183     """ Video info needs to be re-extracted. """
1184
1185     def __init__(self, msg, expected=False):
1186         super().__init__(msg)
1187         self.expected = expected
1188
1189
1190 class ThrottledDownload(ReExtractInfo):
1191     """ Download speed below --throttled-rate. """
1192     msg = 'The download speed is below throttle limit'
1193
1194     def __init__(self):
1195         super().__init__(self.msg, expected=False)
1196
1197
1198 class UnavailableVideoError(YoutubeDLError):
1199     """Unavailable Format exception.
1200
1201     This exception will be thrown when a video is requested
1202     in a format that is not available for that video.
1203     """
1204     msg = 'Unable to download video'
1205
1206     def __init__(self, err=None):
1207         if err is not None:
1208             self.msg += f': {err}'
1209         super().__init__(self.msg)
1210
1211
1212 class ContentTooShortError(YoutubeDLError):
1213     """Content Too Short exception.
1214
1215     This exception may be raised by FileDownloader objects when a file they
1216     download is too small for what the server announced first, indicating
1217     the connection was probably interrupted.
1218     """
1219
1220     def __init__(self, downloaded, expected):
1221         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1222         # Both in bytes
1223         self.downloaded = downloaded
1224         self.expected = expected
1225
1226
1227 class XAttrMetadataError(YoutubeDLError):
1228     def __init__(self, code=None, msg='Unknown error'):
1229         super().__init__(msg)
1230         self.code = code
1231         self.msg = msg
1232
1233         # Parsing code and msg
1234         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1235                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1236             self.reason = 'NO_SPACE'
1237         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1238             self.reason = 'VALUE_TOO_LONG'
1239         else:
1240             self.reason = 'NOT_SUPPORTED'
1241
1242
1243 class XAttrUnavailableError(YoutubeDLError):
1244     pass
1245
1246
1247 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1248     hc = http_class(*args, **kwargs)
1249     source_address = ydl_handler._params.get('source_address')
1250
1251     if source_address is not None:
1252         # This is to workaround _create_connection() from socket where it will try all
1253         # address data from getaddrinfo() including IPv6. This filters the result from
1254         # getaddrinfo() based on the source_address value.
1255         # This is based on the cpython socket.create_connection() function.
1256         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1257         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1258             host, port = address
1259             err = None
1260             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1261             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1262             ip_addrs = [addr for addr in addrs if addr[0] == af]
1263             if addrs and not ip_addrs:
1264                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1265                 raise OSError(
1266                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1267                     % (ip_version, source_address[0]))
1268             for res in ip_addrs:
1269                 af, socktype, proto, canonname, sa = res
1270                 sock = None
1271                 try:
1272                     sock = socket.socket(af, socktype, proto)
1273                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1274                         sock.settimeout(timeout)
1275                     sock.bind(source_address)
1276                     sock.connect(sa)
1277                     err = None  # Explicitly break reference cycle
1278                     return sock
1279                 except OSError as _:
1280                     err = _
1281                     if sock is not None:
1282                         sock.close()
1283             if err is not None:
1284                 raise err
1285             else:
1286                 raise OSError('getaddrinfo returns an empty list')
1287         if hasattr(hc, '_create_connection'):
1288             hc._create_connection = _create_connection
1289         hc.source_address = (source_address, 0)
1290
1291     return hc
1292
1293
1294 def handle_youtubedl_headers(headers):
1295     filtered_headers = headers
1296
1297     if 'Youtubedl-no-compression' in filtered_headers:
1298         filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
1299         del filtered_headers['Youtubedl-no-compression']
1300
1301     return filtered_headers
1302
1303
1304 class YoutubeDLHandler(urllib.request.HTTPHandler):
1305     """Handler for HTTP requests and responses.
1306
1307     This class, when installed with an OpenerDirector, automatically adds
1308     the standard headers to every HTTP request and handles gzipped and
1309     deflated responses from web servers. If compression is to be avoided in
1310     a particular request, the original request in the program code only has
1311     to include the HTTP header "Youtubedl-no-compression", which will be
1312     removed before making the real request.
1313
1314     Part of this code was copied from:
1315
1316     http://techknack.net/python-urllib2-handlers/
1317
1318     Andrew Rowls, the author of that code, agreed to release it to the
1319     public domain.
1320     """
1321
1322     def __init__(self, params, *args, **kwargs):
1323         urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
1324         self._params = params
1325
1326     def http_open(self, req):
1327         conn_class = http.client.HTTPConnection
1328
1329         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1330         if socks_proxy:
1331             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1332             del req.headers['Ytdl-socks-proxy']
1333
1334         return self.do_open(functools.partial(
1335             _create_http_connection, self, conn_class, False),
1336             req)
1337
1338     @staticmethod
1339     def deflate(data):
1340         if not data:
1341             return data
1342         try:
1343             return zlib.decompress(data, -zlib.MAX_WBITS)
1344         except zlib.error:
1345             return zlib.decompress(data)
1346
1347     @staticmethod
1348     def brotli(data):
1349         if not data:
1350             return data
1351         return brotli.decompress(data)
1352
1353     def http_request(self, req):
1354         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1355         # always respected by websites, some tend to give out URLs with non percent-encoded
1356         # non-ASCII characters (see telemb.py, ard.py [#3412])
1357         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1358         # To work around aforementioned issue we will replace request's original URL with
1359         # percent-encoded one
1360         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1361         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1362         url = req.get_full_url()
1363         url_escaped = escape_url(url)
1364
1365         # Substitute URL if any change after escaping
1366         if url != url_escaped:
1367             req = update_Request(req, url=url_escaped)
1368
1369         for h, v in self._params.get('http_headers', std_headers).items():
1370             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1371             # The dict keys are capitalized because of this bug by urllib
1372             if h.capitalize() not in req.headers:
1373                 req.add_header(h, v)
1374
1375         if 'Accept-encoding' not in req.headers:
1376             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1377
1378         req.headers = handle_youtubedl_headers(req.headers)
1379
1380         return super().do_request_(req)
1381
1382     def http_response(self, req, resp):
1383         old_resp = resp
1384         # gzip
1385         if resp.headers.get('Content-encoding', '') == 'gzip':
1386             content = resp.read()
1387             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1388             try:
1389                 uncompressed = io.BytesIO(gz.read())
1390             except OSError as original_ioerror:
1391                 # There may be junk add the end of the file
1392                 # See http://stackoverflow.com/q/4928560/35070 for details
1393                 for i in range(1, 1024):
1394                     try:
1395                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1396                         uncompressed = io.BytesIO(gz.read())
1397                     except OSError:
1398                         continue
1399                     break
1400                 else:
1401                     raise original_ioerror
1402             resp = urllib.request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1403             resp.msg = old_resp.msg
1404             del resp.headers['Content-encoding']
1405         # deflate
1406         if resp.headers.get('Content-encoding', '') == 'deflate':
1407             gz = io.BytesIO(self.deflate(resp.read()))
1408             resp = urllib.request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1409             resp.msg = old_resp.msg
1410             del resp.headers['Content-encoding']
1411         # brotli
1412         if resp.headers.get('Content-encoding', '') == 'br':
1413             resp = urllib.request.addinfourl(
1414                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1415             resp.msg = old_resp.msg
1416             del resp.headers['Content-encoding']
1417         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1418         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1419         if 300 <= resp.code < 400:
1420             location = resp.headers.get('Location')
1421             if location:
1422                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1423                 location = location.encode('iso-8859-1').decode()
1424                 location_escaped = escape_url(location)
1425                 if location != location_escaped:
1426                     del resp.headers['Location']
1427                     resp.headers['Location'] = location_escaped
1428         return resp
1429
1430     https_request = http_request
1431     https_response = http_response
1432
1433
1434 def make_socks_conn_class(base_class, socks_proxy):
1435     assert issubclass(base_class, (
1436         http.client.HTTPConnection, http.client.HTTPSConnection))
1437
1438     url_components = urllib.parse.urlparse(socks_proxy)
1439     if url_components.scheme.lower() == 'socks5':
1440         socks_type = ProxyType.SOCKS5
1441     elif url_components.scheme.lower() in ('socks', 'socks4'):
1442         socks_type = ProxyType.SOCKS4
1443     elif url_components.scheme.lower() == 'socks4a':
1444         socks_type = ProxyType.SOCKS4A
1445
1446     def unquote_if_non_empty(s):
1447         if not s:
1448             return s
1449         return urllib.parse.unquote_plus(s)
1450
1451     proxy_args = (
1452         socks_type,
1453         url_components.hostname, url_components.port or 1080,
1454         True,  # Remote DNS
1455         unquote_if_non_empty(url_components.username),
1456         unquote_if_non_empty(url_components.password),
1457     )
1458
1459     class SocksConnection(base_class):
1460         def connect(self):
1461             self.sock = sockssocket()
1462             self.sock.setproxy(*proxy_args)
1463             if isinstance(self.timeout, (int, float)):
1464                 self.sock.settimeout(self.timeout)
1465             self.sock.connect((self.host, self.port))
1466
1467             if isinstance(self, http.client.HTTPSConnection):
1468                 if hasattr(self, '_context'):  # Python > 2.6
1469                     self.sock = self._context.wrap_socket(
1470                         self.sock, server_hostname=self.host)
1471                 else:
1472                     self.sock = ssl.wrap_socket(self.sock)
1473
1474     return SocksConnection
1475
1476
1477 class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
1478     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1479         urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
1480         self._https_conn_class = https_conn_class or http.client.HTTPSConnection
1481         self._params = params
1482
1483     def https_open(self, req):
1484         kwargs = {}
1485         conn_class = self._https_conn_class
1486
1487         if hasattr(self, '_context'):  # python > 2.6
1488             kwargs['context'] = self._context
1489         if hasattr(self, '_check_hostname'):  # python 3.x
1490             kwargs['check_hostname'] = self._check_hostname
1491
1492         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1493         if socks_proxy:
1494             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1495             del req.headers['Ytdl-socks-proxy']
1496
1497         try:
1498             return self.do_open(
1499                 functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
1500         except urllib.error.URLError as e:
1501             if (isinstance(e.reason, ssl.SSLError)
1502                     and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
1503                 raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
1504             raise
1505
1506
1507 def is_path_like(f):
1508     return isinstance(f, (str, bytes, os.PathLike))
1509
1510
1511 class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1512     """
1513     See [1] for cookie file format.
1514
1515     1. https://curl.haxx.se/docs/http-cookies.html
1516     """
1517     _HTTPONLY_PREFIX = '#HttpOnly_'
1518     _ENTRY_LEN = 7
1519     _HEADER = '''# Netscape HTTP Cookie File
1520 # This file is generated by yt-dlp.  Do not edit.
1521
1522 '''
1523     _CookieFileEntry = collections.namedtuple(
1524         'CookieFileEntry',
1525         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1526
1527     def __init__(self, filename=None, *args, **kwargs):
1528         super().__init__(None, *args, **kwargs)
1529         if is_path_like(filename):
1530             filename = os.fspath(filename)
1531         self.filename = filename
1532
1533     @staticmethod
1534     def _true_or_false(cndn):
1535         return 'TRUE' if cndn else 'FALSE'
1536
1537     @contextlib.contextmanager
1538     def open(self, file, *, write=False):
1539         if is_path_like(file):
1540             with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1541                 yield f
1542         else:
1543             if write:
1544                 file.truncate(0)
1545             yield file
1546
1547     def _really_save(self, f, ignore_discard=False, ignore_expires=False):
1548         now = time.time()
1549         for cookie in self:
1550             if (not ignore_discard and cookie.discard
1551                     or not ignore_expires and cookie.is_expired(now)):
1552                 continue
1553             name, value = cookie.name, cookie.value
1554             if value is None:
1555                 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1556                 # with no name, whereas http.cookiejar regards it as a
1557                 # cookie with no value.
1558                 name, value = '', name
1559             f.write('%s\n' % '\t'.join((
1560                 cookie.domain,
1561                 self._true_or_false(cookie.domain.startswith('.')),
1562                 cookie.path,
1563                 self._true_or_false(cookie.secure),
1564                 str_or_none(cookie.expires, default=''),
1565                 name, value
1566             )))
1567
1568     def save(self, filename=None, *args, **kwargs):
1569         """
1570         Save cookies to a file.
1571         Code is taken from CPython 3.6
1572         https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1573
1574         if filename is None:
1575             if self.filename is not None:
1576                 filename = self.filename
1577             else:
1578                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1579
1580         # Store session cookies with `expires` set to 0 instead of an empty string
1581         for cookie in self:
1582             if cookie.expires is None:
1583                 cookie.expires = 0
1584
1585         with self.open(filename, write=True) as f:
1586             f.write(self._HEADER)
1587             self._really_save(f, *args, **kwargs)
1588
1589     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1590         """Load cookies from a file."""
1591         if filename is None:
1592             if self.filename is not None:
1593                 filename = self.filename
1594             else:
1595                 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1596
1597         def prepare_line(line):
1598             if line.startswith(self._HTTPONLY_PREFIX):
1599                 line = line[len(self._HTTPONLY_PREFIX):]
1600             # comments and empty lines are fine
1601             if line.startswith('#') or not line.strip():
1602                 return line
1603             cookie_list = line.split('\t')
1604             if len(cookie_list) != self._ENTRY_LEN:
1605                 raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
1606             cookie = self._CookieFileEntry(*cookie_list)
1607             if cookie.expires_at and not cookie.expires_at.isdigit():
1608                 raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1609             return line
1610
1611         cf = io.StringIO()
1612         with self.open(filename) as f:
1613             for line in f:
1614                 try:
1615                     cf.write(prepare_line(line))
1616                 except http.cookiejar.LoadError as e:
1617                     if f'{line.strip()} '[0] in '[{"':
1618                         raise http.cookiejar.LoadError(
1619                             'Cookies file must be Netscape formatted, not JSON. See  '
1620                             'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1621                     write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1622                     continue
1623         cf.seek(0)
1624         self._really_load(cf, filename, ignore_discard, ignore_expires)
1625         # Session cookies are denoted by either `expires` field set to
1626         # an empty string or 0. MozillaCookieJar only recognizes the former
1627         # (see [1]). So we need force the latter to be recognized as session
1628         # cookies on our own.
1629         # Session cookies may be important for cookies-based authentication,
1630         # e.g. usually, when user does not check 'Remember me' check box while
1631         # logging in on a site, some important cookies are stored as session
1632         # cookies so that not recognizing them will result in failed login.
1633         # 1. https://bugs.python.org/issue17164
1634         for cookie in self:
1635             # Treat `expires=0` cookies as session cookies
1636             if cookie.expires == 0:
1637                 cookie.expires = None
1638                 cookie.discard = True
1639
1640
1641 class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
1642     def __init__(self, cookiejar=None):
1643         urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
1644
1645     def http_response(self, request, response):
1646         return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
1647
1648     https_request = urllib.request.HTTPCookieProcessor.http_request
1649     https_response = http_response
1650
1651
1652 class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler):
1653     """YoutubeDL redirect handler
1654
1655     The code is based on HTTPRedirectHandler implementation from CPython [1].
1656
1657     This redirect handler solves two issues:
1658      - ensures redirect URL is always unicode under python 2
1659      - introduces support for experimental HTTP response status code
1660        308 Permanent Redirect [2] used by some sites [3]
1661
1662     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1663     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1664     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1665     """
1666
1667     http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
1668
1669     def redirect_request(self, req, fp, code, msg, headers, newurl):
1670         """Return a Request or None in response to a redirect.
1671
1672         This is called by the http_error_30x methods when a
1673         redirection response is received.  If a redirection should
1674         take place, return a new Request to allow http_error_30x to
1675         perform the redirect.  Otherwise, raise HTTPError if no-one
1676         else should try to handle this url.  Return None if you can't
1677         but another Handler might.
1678         """
1679         m = req.get_method()
1680         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1681                  or code in (301, 302, 303) and m == "POST")):
1682             raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
1683         # Strictly (according to RFC 2616), 301 or 302 in response to
1684         # a POST MUST NOT cause a redirection without confirmation
1685         # from the user (of urllib.request, in this case).  In practice,
1686         # essentially all clients do redirect in this case, so we do
1687         # the same.
1688
1689         # Be conciliant with URIs containing a space.  This is mainly
1690         # redundant with the more complete encoding done in http_error_302(),
1691         # but it is kept for compatibility with other callers.
1692         newurl = newurl.replace(' ', '%20')
1693
1694         CONTENT_HEADERS = ("content-length", "content-type")
1695         # NB: don't use dict comprehension for python 2.6 compatibility
1696         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1697
1698         # A 303 must either use GET or HEAD for subsequent request
1699         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
1700         if code == 303 and m != 'HEAD':
1701             m = 'GET'
1702         # 301 and 302 redirects are commonly turned into a GET from a POST
1703         # for subsequent requests by browsers, so we'll do the same.
1704         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
1705         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
1706         if code in (301, 302) and m == 'POST':
1707             m = 'GET'
1708
1709         return urllib.request.Request(
1710             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1711             unverifiable=True, method=m)
1712
1713
1714 def extract_timezone(date_str):
1715     m = re.search(
1716         r'''(?x)
1717             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1718             (?P<tz>Z|                                            # just the UTC Z, or
1719                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1720                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1721                    [ ]?                                          # optional space
1722                 (?P<sign>\+|-)                                   # +/-
1723                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1724             $)
1725         ''', date_str)
1726     if not m:
1727         m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1728         timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
1729         if timezone is not None:
1730             date_str = date_str[:-len(m.group('tz'))]
1731         timezone = datetime.timedelta(hours=timezone or 0)
1732     else:
1733         date_str = date_str[:-len(m.group('tz'))]
1734         if not m.group('sign'):
1735             timezone = datetime.timedelta()
1736         else:
1737             sign = 1 if m.group('sign') == '+' else -1
1738             timezone = datetime.timedelta(
1739                 hours=sign * int(m.group('hours')),
1740                 minutes=sign * int(m.group('minutes')))
1741     return timezone, date_str
1742
1743
1744 def parse_iso8601(date_str, delimiter='T', timezone=None):
1745     """ Return a UNIX timestamp from the given date """
1746
1747     if date_str is None:
1748         return None
1749
1750     date_str = re.sub(r'\.[0-9]+', '', date_str)
1751
1752     if timezone is None:
1753         timezone, date_str = extract_timezone(date_str)
1754
1755     with contextlib.suppress(ValueError):
1756         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1757         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1758         return calendar.timegm(dt.timetuple())
1759
1760
1761 def date_formats(day_first=True):
1762     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1763
1764
1765 def unified_strdate(date_str, day_first=True):
1766     """Return a string with the date in the format YYYYMMDD"""
1767
1768     if date_str is None:
1769         return None
1770     upload_date = None
1771     # Replace commas
1772     date_str = date_str.replace(',', ' ')
1773     # Remove AM/PM + timezone
1774     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1775     _, date_str = extract_timezone(date_str)
1776
1777     for expression in date_formats(day_first):
1778         with contextlib.suppress(ValueError):
1779             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1780     if upload_date is None:
1781         timetuple = email.utils.parsedate_tz(date_str)
1782         if timetuple:
1783             with contextlib.suppress(ValueError):
1784                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1785     if upload_date is not None:
1786         return str(upload_date)
1787
1788
1789 def unified_timestamp(date_str, day_first=True):
1790     if date_str is None:
1791         return None
1792
1793     date_str = re.sub(r'\s+', ' ', re.sub(
1794         r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
1795
1796     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1797     timezone, date_str = extract_timezone(date_str)
1798
1799     # Remove AM/PM + timezone
1800     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1801
1802     # Remove unrecognized timezones from ISO 8601 alike timestamps
1803     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1804     if m:
1805         date_str = date_str[:-len(m.group('tz'))]
1806
1807     # Python only supports microseconds, so remove nanoseconds
1808     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1809     if m:
1810         date_str = m.group(1)
1811
1812     for expression in date_formats(day_first):
1813         with contextlib.suppress(ValueError):
1814             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1815             return calendar.timegm(dt.timetuple())
1816
1817     timetuple = email.utils.parsedate_tz(date_str)
1818     if timetuple:
1819         return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
1820
1821
1822 def determine_ext(url, default_ext='unknown_video'):
1823     if url is None or '.' not in url:
1824         return default_ext
1825     guess = url.partition('?')[0].rpartition('.')[2]
1826     if re.match(r'^[A-Za-z0-9]+$', guess):
1827         return guess
1828     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1829     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1830         return guess.rstrip('/')
1831     else:
1832         return default_ext
1833
1834
1835 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1836     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1837
1838
1839 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1840     R"""
1841     Return a datetime object from a string.
1842     Supported format:
1843         (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
1844
1845     @param format       strftime format of DATE
1846     @param precision    Round the datetime object: auto|microsecond|second|minute|hour|day
1847                         auto: round to the unit provided in date_str (if applicable).
1848     """
1849     auto_precision = False
1850     if precision == 'auto':
1851         auto_precision = True
1852         precision = 'microsecond'
1853     today = datetime_round(datetime.datetime.utcnow(), precision)
1854     if date_str in ('now', 'today'):
1855         return today
1856     if date_str == 'yesterday':
1857         return today - datetime.timedelta(days=1)
1858     match = re.match(
1859         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
1860         date_str)
1861     if match is not None:
1862         start_time = datetime_from_str(match.group('start'), precision, format)
1863         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1864         unit = match.group('unit')
1865         if unit == 'month' or unit == 'year':
1866             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1867             unit = 'day'
1868         else:
1869             if unit == 'week':
1870                 unit = 'day'
1871                 time *= 7
1872             delta = datetime.timedelta(**{unit + 's': time})
1873             new_date = start_time + delta
1874         if auto_precision:
1875             return datetime_round(new_date, unit)
1876         return new_date
1877
1878     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1879
1880
1881 def date_from_str(date_str, format='%Y%m%d', strict=False):
1882     R"""
1883     Return a date object from a string using datetime_from_str
1884
1885     @param strict  Restrict allowed patterns to "YYYYMMDD" and
1886                    (now|today|yesterday)(-\d+(day|week|month|year)s?)?
1887     """
1888     if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
1889         raise ValueError(f'Invalid date format "{date_str}"')
1890     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1891
1892
1893 def datetime_add_months(dt, months):
1894     """Increment/Decrement a datetime object by months."""
1895     month = dt.month + months - 1
1896     year = dt.year + month // 12
1897     month = month % 12 + 1
1898     day = min(dt.day, calendar.monthrange(year, month)[1])
1899     return dt.replace(year, month, day)
1900
1901
1902 def datetime_round(dt, precision='day'):
1903     """
1904     Round a datetime object's time to a specific precision
1905     """
1906     if precision == 'microsecond':
1907         return dt
1908
1909     unit_seconds = {
1910         'day': 86400,
1911         'hour': 3600,
1912         'minute': 60,
1913         'second': 1,
1914     }
1915     roundto = lambda x, n: ((x + n / 2) // n) * n
1916     timestamp = calendar.timegm(dt.timetuple())
1917     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1918
1919
1920 def hyphenate_date(date_str):
1921     """
1922     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1923     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1924     if match is not None:
1925         return '-'.join(match.groups())
1926     else:
1927         return date_str
1928
1929
1930 class DateRange:
1931     """Represents a time interval between two dates"""
1932
1933     def __init__(self, start=None, end=None):
1934         """start and end must be strings in the format accepted by date"""
1935         if start is not None:
1936             self.start = date_from_str(start, strict=True)
1937         else:
1938             self.start = datetime.datetime.min.date()
1939         if end is not None:
1940             self.end = date_from_str(end, strict=True)
1941         else:
1942             self.end = datetime.datetime.max.date()
1943         if self.start > self.end:
1944             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1945
1946     @classmethod
1947     def day(cls, day):
1948         """Returns a range that only contains the given day"""
1949         return cls(day, day)
1950
1951     def __contains__(self, date):
1952         """Check if the date is in the range"""
1953         if not isinstance(date, datetime.date):
1954             date = date_from_str(date)
1955         return self.start <= date <= self.end
1956
1957     def __str__(self):
1958         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1959
1960     def __eq__(self, other):
1961         return (isinstance(other, DateRange)
1962                 and self.start == other.start and self.end == other.end)
1963
1964
1965 def platform_name():
1966     """ Returns the platform name as a str """
1967     deprecation_warning(f'"{__name__}.platform_name" is deprecated, use "platform.platform" instead')
1968     return platform.platform()
1969
1970
1971 @functools.cache
1972 def system_identifier():
1973     python_implementation = platform.python_implementation()
1974     if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
1975         python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
1976     libc_ver = []
1977     with contextlib.suppress(OSError):  # We may not have access to the executable
1978         libc_ver = platform.libc_ver()
1979
1980     return 'Python %s (%s %s) - %s %s' % (
1981         platform.python_version(),
1982         python_implementation,
1983         platform.architecture()[0],
1984         platform.platform(),
1985         format_field(join_nonempty(*libc_ver, delim=' '), None, '(%s)'),
1986     )
1987
1988
1989 @functools.cache
1990 def get_windows_version():
1991     ''' Get Windows version. returns () if it's not running on Windows '''
1992     if compat_os_name == 'nt':
1993         return version_tuple(platform.win32_ver()[1])
1994     else:
1995         return ()
1996
1997
1998 def write_string(s, out=None, encoding=None):
1999     assert isinstance(s, str)
2000     out = out or sys.stderr
2001
2002     if compat_os_name == 'nt' and supports_terminal_sequences(out):
2003         s = re.sub(r'([\r\n]+)', r' \1', s)
2004
2005     enc, buffer = None, out
2006     if 'b' in getattr(out, 'mode', ''):
2007         enc = encoding or preferredencoding()
2008     elif hasattr(out, 'buffer'):
2009         buffer = out.buffer
2010         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
2011
2012     buffer.write(s.encode(enc, 'ignore') if enc else s)
2013     out.flush()
2014
2015
2016 def deprecation_warning(msg, *, printer=None, stacklevel=0, **kwargs):
2017     from . import _IN_CLI
2018     if _IN_CLI:
2019         if msg in deprecation_warning._cache:
2020             return
2021         deprecation_warning._cache.add(msg)
2022         if printer:
2023             return printer(f'{msg}{bug_reports_message()}', **kwargs)
2024         return write_string(f'ERROR: {msg}{bug_reports_message()}\n', **kwargs)
2025     else:
2026         import warnings
2027         warnings.warn(DeprecationWarning(msg), stacklevel=stacklevel + 3)
2028
2029
2030 deprecation_warning._cache = set()
2031
2032
2033 def bytes_to_intlist(bs):
2034     if not bs:
2035         return []
2036     if isinstance(bs[0], int):  # Python 3
2037         return list(bs)
2038     else:
2039         return [ord(c) for c in bs]
2040
2041
2042 def intlist_to_bytes(xs):
2043     if not xs:
2044         return b''
2045     return struct.pack('%dB' % len(xs), *xs)
2046
2047
2048 class LockingUnsupportedError(OSError):
2049     msg = 'File locking is not supported'
2050
2051     def __init__(self):
2052         super().__init__(self.msg)
2053
2054
2055 # Cross-platform file locking
2056 if sys.platform == 'win32':
2057     import ctypes
2058     import ctypes.wintypes
2059     import msvcrt
2060
2061     class OVERLAPPED(ctypes.Structure):
2062         _fields_ = [
2063             ('Internal', ctypes.wintypes.LPVOID),
2064             ('InternalHigh', ctypes.wintypes.LPVOID),
2065             ('Offset', ctypes.wintypes.DWORD),
2066             ('OffsetHigh', ctypes.wintypes.DWORD),
2067             ('hEvent', ctypes.wintypes.HANDLE),
2068         ]
2069
2070     kernel32 = ctypes.windll.kernel32
2071     LockFileEx = kernel32.LockFileEx
2072     LockFileEx.argtypes = [
2073         ctypes.wintypes.HANDLE,     # hFile
2074         ctypes.wintypes.DWORD,      # dwFlags
2075         ctypes.wintypes.DWORD,      # dwReserved
2076         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2077         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2078         ctypes.POINTER(OVERLAPPED)  # Overlapped
2079     ]
2080     LockFileEx.restype = ctypes.wintypes.BOOL
2081     UnlockFileEx = kernel32.UnlockFileEx
2082     UnlockFileEx.argtypes = [
2083         ctypes.wintypes.HANDLE,     # hFile
2084         ctypes.wintypes.DWORD,      # dwReserved
2085         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2086         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2087         ctypes.POINTER(OVERLAPPED)  # Overlapped
2088     ]
2089     UnlockFileEx.restype = ctypes.wintypes.BOOL
2090     whole_low = 0xffffffff
2091     whole_high = 0x7fffffff
2092
2093     def _lock_file(f, exclusive, block):
2094         overlapped = OVERLAPPED()
2095         overlapped.Offset = 0
2096         overlapped.OffsetHigh = 0
2097         overlapped.hEvent = 0
2098         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
2099
2100         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
2101                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
2102                           0, whole_low, whole_high, f._lock_file_overlapped_p):
2103             # NB: No argument form of "ctypes.FormatError" does not work on PyPy
2104             raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
2105
2106     def _unlock_file(f):
2107         assert f._lock_file_overlapped_p
2108         handle = msvcrt.get_osfhandle(f.fileno())
2109         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
2110             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
2111
2112 else:
2113     try:
2114         import fcntl
2115
2116         def _lock_file(f, exclusive, block):
2117             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
2118             if not block:
2119                 flags |= fcntl.LOCK_NB
2120             try:
2121                 fcntl.flock(f, flags)
2122             except BlockingIOError:
2123                 raise
2124             except OSError:  # AOSP does not have flock()
2125                 fcntl.lockf(f, flags)
2126
2127         def _unlock_file(f):
2128             try:
2129                 fcntl.flock(f, fcntl.LOCK_UN)
2130             except OSError:
2131                 fcntl.lockf(f, fcntl.LOCK_UN)
2132
2133     except ImportError:
2134
2135         def _lock_file(f, exclusive, block):
2136             raise LockingUnsupportedError()
2137
2138         def _unlock_file(f):
2139             raise LockingUnsupportedError()
2140
2141
2142 class locked_file:
2143     locked = False
2144
2145     def __init__(self, filename, mode, block=True, encoding=None):
2146         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
2147             raise NotImplementedError(mode)
2148         self.mode, self.block = mode, block
2149
2150         writable = any(f in mode for f in 'wax+')
2151         readable = any(f in mode for f in 'r+')
2152         flags = functools.reduce(operator.ior, (
2153             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
2154             getattr(os, 'O_BINARY', 0),  # Windows only
2155             getattr(os, 'O_NOINHERIT', 0),  # Windows only
2156             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
2157             os.O_APPEND if 'a' in mode else 0,
2158             os.O_EXCL if 'x' in mode else 0,
2159             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2160         ))
2161
2162         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2163
2164     def __enter__(self):
2165         exclusive = 'r' not in self.mode
2166         try:
2167             _lock_file(self.f, exclusive, self.block)
2168             self.locked = True
2169         except OSError:
2170             self.f.close()
2171             raise
2172         if 'w' in self.mode:
2173             try:
2174                 self.f.truncate()
2175             except OSError as e:
2176                 if e.errno not in (
2177                     errno.ESPIPE,  # Illegal seek - expected for FIFO
2178                     errno.EINVAL,  # Invalid argument - expected for /dev/null
2179                 ):
2180                     raise
2181         return self
2182
2183     def unlock(self):
2184         if not self.locked:
2185             return
2186         try:
2187             _unlock_file(self.f)
2188         finally:
2189             self.locked = False
2190
2191     def __exit__(self, *_):
2192         try:
2193             self.unlock()
2194         finally:
2195             self.f.close()
2196
2197     open = __enter__
2198     close = __exit__
2199
2200     def __getattr__(self, attr):
2201         return getattr(self.f, attr)
2202
2203     def __iter__(self):
2204         return iter(self.f)
2205
2206
2207 @functools.cache
2208 def get_filesystem_encoding():
2209     encoding = sys.getfilesystemencoding()
2210     return encoding if encoding is not None else 'utf-8'
2211
2212
2213 def shell_quote(args):
2214     quoted_args = []
2215     encoding = get_filesystem_encoding()
2216     for a in args:
2217         if isinstance(a, bytes):
2218             # We may get a filename encoded with 'encodeFilename'
2219             a = a.decode(encoding)
2220         quoted_args.append(compat_shlex_quote(a))
2221     return ' '.join(quoted_args)
2222
2223
2224 def smuggle_url(url, data):
2225     """ Pass additional data in a URL for internal use. """
2226
2227     url, idata = unsmuggle_url(url, {})
2228     data.update(idata)
2229     sdata = urllib.parse.urlencode(
2230         {'__youtubedl_smuggle': json.dumps(data)})
2231     return url + '#' + sdata
2232
2233
2234 def unsmuggle_url(smug_url, default=None):
2235     if '#__youtubedl_smuggle' not in smug_url:
2236         return smug_url, default
2237     url, _, sdata = smug_url.rpartition('#')
2238     jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
2239     data = json.loads(jsond)
2240     return url, data
2241
2242
2243 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2244     """ Formats numbers with decimal sufixes like K, M, etc """
2245     num, factor = float_or_none(num), float(factor)
2246     if num is None or num < 0:
2247         return None
2248     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2249     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2250     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2251     if factor == 1024:
2252         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2253     converted = num / (factor ** exponent)
2254     return fmt % (converted, suffix)
2255
2256
2257 def format_bytes(bytes):
2258     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2259
2260
2261 def lookup_unit_table(unit_table, s):
2262     units_re = '|'.join(re.escape(u) for u in unit_table)
2263     m = re.match(
2264         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
2265     if not m:
2266         return None
2267     num_str = m.group('num').replace(',', '.')
2268     mult = unit_table[m.group('unit')]
2269     return int(float(num_str) * mult)
2270
2271
2272 def parse_filesize(s):
2273     if s is None:
2274         return None
2275
2276     # The lower-case forms are of course incorrect and unofficial,
2277     # but we support those too
2278     _UNIT_TABLE = {
2279         'B': 1,
2280         'b': 1,
2281         'bytes': 1,
2282         'KiB': 1024,
2283         'KB': 1000,
2284         'kB': 1024,
2285         'Kb': 1000,
2286         'kb': 1000,
2287         'kilobytes': 1000,
2288         'kibibytes': 1024,
2289         'MiB': 1024 ** 2,
2290         'MB': 1000 ** 2,
2291         'mB': 1024 ** 2,
2292         'Mb': 1000 ** 2,
2293         'mb': 1000 ** 2,
2294         'megabytes': 1000 ** 2,
2295         'mebibytes': 1024 ** 2,
2296         'GiB': 1024 ** 3,
2297         'GB': 1000 ** 3,
2298         'gB': 1024 ** 3,
2299         'Gb': 1000 ** 3,
2300         'gb': 1000 ** 3,
2301         'gigabytes': 1000 ** 3,
2302         'gibibytes': 1024 ** 3,
2303         'TiB': 1024 ** 4,
2304         'TB': 1000 ** 4,
2305         'tB': 1024 ** 4,
2306         'Tb': 1000 ** 4,
2307         'tb': 1000 ** 4,
2308         'terabytes': 1000 ** 4,
2309         'tebibytes': 1024 ** 4,
2310         'PiB': 1024 ** 5,
2311         'PB': 1000 ** 5,
2312         'pB': 1024 ** 5,
2313         'Pb': 1000 ** 5,
2314         'pb': 1000 ** 5,
2315         'petabytes': 1000 ** 5,
2316         'pebibytes': 1024 ** 5,
2317         'EiB': 1024 ** 6,
2318         'EB': 1000 ** 6,
2319         'eB': 1024 ** 6,
2320         'Eb': 1000 ** 6,
2321         'eb': 1000 ** 6,
2322         'exabytes': 1000 ** 6,
2323         'exbibytes': 1024 ** 6,
2324         'ZiB': 1024 ** 7,
2325         'ZB': 1000 ** 7,
2326         'zB': 1024 ** 7,
2327         'Zb': 1000 ** 7,
2328         'zb': 1000 ** 7,
2329         'zettabytes': 1000 ** 7,
2330         'zebibytes': 1024 ** 7,
2331         'YiB': 1024 ** 8,
2332         'YB': 1000 ** 8,
2333         'yB': 1024 ** 8,
2334         'Yb': 1000 ** 8,
2335         'yb': 1000 ** 8,
2336         'yottabytes': 1000 ** 8,
2337         'yobibytes': 1024 ** 8,
2338     }
2339
2340     return lookup_unit_table(_UNIT_TABLE, s)
2341
2342
2343 def parse_count(s):
2344     if s is None:
2345         return None
2346
2347     s = re.sub(r'^[^\d]+\s', '', s).strip()
2348
2349     if re.match(r'^[\d,.]+$', s):
2350         return str_to_int(s)
2351
2352     _UNIT_TABLE = {
2353         'k': 1000,
2354         'K': 1000,
2355         'm': 1000 ** 2,
2356         'M': 1000 ** 2,
2357         'kk': 1000 ** 2,
2358         'KK': 1000 ** 2,
2359         'b': 1000 ** 3,
2360         'B': 1000 ** 3,
2361     }
2362
2363     ret = lookup_unit_table(_UNIT_TABLE, s)
2364     if ret is not None:
2365         return ret
2366
2367     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2368     if mobj:
2369         return str_to_int(mobj.group(1))
2370
2371
2372 def parse_resolution(s, *, lenient=False):
2373     if s is None:
2374         return {}
2375
2376     if lenient:
2377         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2378     else:
2379         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2380     if mobj:
2381         return {
2382             'width': int(mobj.group('w')),
2383             'height': int(mobj.group('h')),
2384         }
2385
2386     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2387     if mobj:
2388         return {'height': int(mobj.group(1))}
2389
2390     mobj = re.search(r'\b([48])[kK]\b', s)
2391     if mobj:
2392         return {'height': int(mobj.group(1)) * 540}
2393
2394     return {}
2395
2396
2397 def parse_bitrate(s):
2398     if not isinstance(s, str):
2399         return
2400     mobj = re.search(r'\b(\d+)\s*kbps', s)
2401     if mobj:
2402         return int(mobj.group(1))
2403
2404
2405 def month_by_name(name, lang='en'):
2406     """ Return the number of a month by (locale-independently) English name """
2407
2408     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2409
2410     try:
2411         return month_names.index(name) + 1
2412     except ValueError:
2413         return None
2414
2415
2416 def month_by_abbreviation(abbrev):
2417     """ Return the number of a month by (locale-independently) English
2418         abbreviations """
2419
2420     try:
2421         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2422     except ValueError:
2423         return None
2424
2425
2426 def fix_xml_ampersands(xml_str):
2427     """Replace all the '&' by '&amp;' in XML"""
2428     return re.sub(
2429         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2430         '&amp;',
2431         xml_str)
2432
2433
2434 def setproctitle(title):
2435     assert isinstance(title, str)
2436
2437     # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
2438     try:
2439         import ctypes
2440     except ImportError:
2441         return
2442
2443     try:
2444         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2445     except OSError:
2446         return
2447     except TypeError:
2448         # LoadLibrary in Windows Python 2.7.13 only expects
2449         # a bytestring, but since unicode_literals turns
2450         # every string into a unicode string, it fails.
2451         return
2452     title_bytes = title.encode()
2453     buf = ctypes.create_string_buffer(len(title_bytes))
2454     buf.value = title_bytes
2455     try:
2456         libc.prctl(15, buf, 0, 0, 0)
2457     except AttributeError:
2458         return  # Strange libc, just skip this
2459
2460
2461 def remove_start(s, start):
2462     return s[len(start):] if s is not None and s.startswith(start) else s
2463
2464
2465 def remove_end(s, end):
2466     return s[:-len(end)] if s is not None and s.endswith(end) else s
2467
2468
2469 def remove_quotes(s):
2470     if s is None or len(s) < 2:
2471         return s
2472     for quote in ('"', "'", ):
2473         if s[0] == quote and s[-1] == quote:
2474             return s[1:-1]
2475     return s
2476
2477
2478 def get_domain(url):
2479     """
2480     This implementation is inconsistent, but is kept for compatibility.
2481     Use this only for "webpage_url_domain"
2482     """
2483     return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None
2484
2485
2486 def url_basename(url):
2487     path = urllib.parse.urlparse(url).path
2488     return path.strip('/').split('/')[-1]
2489
2490
2491 def base_url(url):
2492     return re.match(r'https?://[^?#]+/', url).group()
2493
2494
2495 def urljoin(base, path):
2496     if isinstance(path, bytes):
2497         path = path.decode()
2498     if not isinstance(path, str) or not path:
2499         return None
2500     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2501         return path
2502     if isinstance(base, bytes):
2503         base = base.decode()
2504     if not isinstance(base, str) or not re.match(
2505             r'^(?:https?:)?//', base):
2506         return None
2507     return urllib.parse.urljoin(base, path)
2508
2509
2510 class HEADRequest(urllib.request.Request):
2511     def get_method(self):
2512         return 'HEAD'
2513
2514
2515 class PUTRequest(urllib.request.Request):
2516     def get_method(self):
2517         return 'PUT'
2518
2519
2520 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2521     if get_attr and v is not None:
2522         v = getattr(v, get_attr, None)
2523     try:
2524         return int(v) * invscale // scale
2525     except (ValueError, TypeError, OverflowError):
2526         return default
2527
2528
2529 def str_or_none(v, default=None):
2530     return default if v is None else str(v)
2531
2532
2533 def str_to_int(int_str):
2534     """ A more relaxed version of int_or_none """
2535     if isinstance(int_str, int):
2536         return int_str
2537     elif isinstance(int_str, str):
2538         int_str = re.sub(r'[,\.\+]', '', int_str)
2539         return int_or_none(int_str)
2540
2541
2542 def float_or_none(v, scale=1, invscale=1, default=None):
2543     if v is None:
2544         return default
2545     try:
2546         return float(v) * invscale / scale
2547     except (ValueError, TypeError):
2548         return default
2549
2550
2551 def bool_or_none(v, default=None):
2552     return v if isinstance(v, bool) else default
2553
2554
2555 def strip_or_none(v, default=None):
2556     return v.strip() if isinstance(v, str) else default
2557
2558
2559 def url_or_none(url):
2560     if not url or not isinstance(url, str):
2561         return None
2562     url = url.strip()
2563     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2564
2565
2566 def request_to_url(req):
2567     if isinstance(req, urllib.request.Request):
2568         return req.get_full_url()
2569     else:
2570         return req
2571
2572
2573 def strftime_or_none(timestamp, date_format, default=None):
2574     datetime_object = None
2575     try:
2576         if isinstance(timestamp, (int, float)):  # unix timestamp
2577             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
2578         elif isinstance(timestamp, str):  # assume YYYYMMDD
2579             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2580         date_format = re.sub(  # Support %s on windows
2581             r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
2582         return datetime_object.strftime(date_format)
2583     except (ValueError, TypeError, AttributeError):
2584         return default
2585
2586
2587 def parse_duration(s):
2588     if not isinstance(s, str):
2589         return None
2590     s = s.strip()
2591     if not s:
2592         return None
2593
2594     days, hours, mins, secs, ms = [None] * 5
2595     m = re.match(r'''(?x)
2596             (?P<before_secs>
2597                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2598             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2599             (?P<ms>[.:][0-9]+)?Z?$
2600         ''', s)
2601     if m:
2602         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2603     else:
2604         m = re.match(
2605             r'''(?ix)(?:P?
2606                 (?:
2607                     [0-9]+\s*y(?:ears?)?,?\s*
2608                 )?
2609                 (?:
2610                     [0-9]+\s*m(?:onths?)?,?\s*
2611                 )?
2612                 (?:
2613                     [0-9]+\s*w(?:eeks?)?,?\s*
2614                 )?
2615                 (?:
2616                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2617                 )?
2618                 T)?
2619                 (?:
2620                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2621                 )?
2622                 (?:
2623                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2624                 )?
2625                 (?:
2626                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2627                 )?Z?$''', s)
2628         if m:
2629             days, hours, mins, secs, ms = m.groups()
2630         else:
2631             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2632             if m:
2633                 hours, mins = m.groups()
2634             else:
2635                 return None
2636
2637     if ms:
2638         ms = ms.replace(':', '.')
2639     return sum(float(part or 0) * mult for part, mult in (
2640         (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
2641
2642
2643 def prepend_extension(filename, ext, expected_real_ext=None):
2644     name, real_ext = os.path.splitext(filename)
2645     return (
2646         f'{name}.{ext}{real_ext}'
2647         if not expected_real_ext or real_ext[1:] == expected_real_ext
2648         else f'{filename}.{ext}')
2649
2650
2651 def replace_extension(filename, ext, expected_real_ext=None):
2652     name, real_ext = os.path.splitext(filename)
2653     return '{}.{}'.format(
2654         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2655         ext)
2656
2657
2658 def check_executable(exe, args=[]):
2659     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2660     args can be a list of arguments for a short output (like -version) """
2661     try:
2662         Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2663     except OSError:
2664         return False
2665     return exe
2666
2667
2668 def _get_exe_version_output(exe, args, *, to_screen=None):
2669     if to_screen:
2670         to_screen(f'Checking exe version: {shell_quote([exe] + args)}')
2671     try:
2672         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2673         # SIGTTOU if yt-dlp is run in the background.
2674         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2675         stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True,
2676                                  stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
2677     except OSError:
2678         return False
2679     return stdout
2680
2681
2682 def detect_exe_version(output, version_re=None, unrecognized='present'):
2683     assert isinstance(output, str)
2684     if version_re is None:
2685         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2686     m = re.search(version_re, output)
2687     if m:
2688         return m.group(1)
2689     else:
2690         return unrecognized
2691
2692
2693 def get_exe_version(exe, args=['--version'],
2694                     version_re=None, unrecognized='present'):
2695     """ Returns the version of the specified executable,
2696     or False if the executable is not present """
2697     out = _get_exe_version_output(exe, args)
2698     return detect_exe_version(out, version_re, unrecognized) if out else False
2699
2700
2701 def frange(start=0, stop=None, step=1):
2702     """Float range"""
2703     if stop is None:
2704         start, stop = 0, start
2705     sign = [-1, 1][step > 0] if step else 0
2706     while sign * start < sign * stop:
2707         yield start
2708         start += step
2709
2710
2711 class LazyList(collections.abc.Sequence):
2712     """Lazy immutable list from an iterable
2713     Note that slices of a LazyList are lists and not LazyList"""
2714
2715     class IndexError(IndexError):
2716         pass
2717
2718     def __init__(self, iterable, *, reverse=False, _cache=None):
2719         self._iterable = iter(iterable)
2720         self._cache = [] if _cache is None else _cache
2721         self._reversed = reverse
2722
2723     def __iter__(self):
2724         if self._reversed:
2725             # We need to consume the entire iterable to iterate in reverse
2726             yield from self.exhaust()
2727             return
2728         yield from self._cache
2729         for item in self._iterable:
2730             self._cache.append(item)
2731             yield item
2732
2733     def _exhaust(self):
2734         self._cache.extend(self._iterable)
2735         self._iterable = []  # Discard the emptied iterable to make it pickle-able
2736         return self._cache
2737
2738     def exhaust(self):
2739         """Evaluate the entire iterable"""
2740         return self._exhaust()[::-1 if self._reversed else 1]
2741
2742     @staticmethod
2743     def _reverse_index(x):
2744         return None if x is None else ~x
2745
2746     def __getitem__(self, idx):
2747         if isinstance(idx, slice):
2748             if self._reversed:
2749                 idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
2750             start, stop, step = idx.start, idx.stop, idx.step or 1
2751         elif isinstance(idx, int):
2752             if self._reversed:
2753                 idx = self._reverse_index(idx)
2754             start, stop, step = idx, idx, 0
2755         else:
2756             raise TypeError('indices must be integers or slices')
2757         if ((start or 0) < 0 or (stop or 0) < 0
2758                 or (start is None and step < 0)
2759                 or (stop is None and step > 0)):
2760             # We need to consume the entire iterable to be able to slice from the end
2761             # Obviously, never use this with infinite iterables
2762             self._exhaust()
2763             try:
2764                 return self._cache[idx]
2765             except IndexError as e:
2766                 raise self.IndexError(e) from e
2767         n = max(start or 0, stop or 0) - len(self._cache) + 1
2768         if n > 0:
2769             self._cache.extend(itertools.islice(self._iterable, n))
2770         try:
2771             return self._cache[idx]
2772         except IndexError as e:
2773             raise self.IndexError(e) from e
2774
2775     def __bool__(self):
2776         try:
2777             self[-1] if self._reversed else self[0]
2778         except self.IndexError:
2779             return False
2780         return True
2781
2782     def __len__(self):
2783         self._exhaust()
2784         return len(self._cache)
2785
2786     def __reversed__(self):
2787         return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
2788
2789     def __copy__(self):
2790         return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
2791
2792     def __repr__(self):
2793         # repr and str should mimic a list. So we exhaust the iterable
2794         return repr(self.exhaust())
2795
2796     def __str__(self):
2797         return repr(self.exhaust())
2798
2799
2800 class PagedList:
2801
2802     class IndexError(IndexError):
2803         pass
2804
2805     def __len__(self):
2806         # This is only useful for tests
2807         return len(self.getslice())
2808
2809     def __init__(self, pagefunc, pagesize, use_cache=True):
2810         self._pagefunc = pagefunc
2811         self._pagesize = pagesize
2812         self._pagecount = float('inf')
2813         self._use_cache = use_cache
2814         self._cache = {}
2815
2816     def getpage(self, pagenum):
2817         page_results = self._cache.get(pagenum)
2818         if page_results is None:
2819             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2820         if self._use_cache:
2821             self._cache[pagenum] = page_results
2822         return page_results
2823
2824     def getslice(self, start=0, end=None):
2825         return list(self._getslice(start, end))
2826
2827     def _getslice(self, start, end):
2828         raise NotImplementedError('This method must be implemented by subclasses')
2829
2830     def __getitem__(self, idx):
2831         assert self._use_cache, 'Indexing PagedList requires cache'
2832         if not isinstance(idx, int) or idx < 0:
2833             raise TypeError('indices must be non-negative integers')
2834         entries = self.getslice(idx, idx + 1)
2835         if not entries:
2836             raise self.IndexError()
2837         return entries[0]
2838
2839
2840 class OnDemandPagedList(PagedList):
2841     """Download pages until a page with less than maximum results"""
2842
2843     def _getslice(self, start, end):
2844         for pagenum in itertools.count(start // self._pagesize):
2845             firstid = pagenum * self._pagesize
2846             nextfirstid = pagenum * self._pagesize + self._pagesize
2847             if start >= nextfirstid:
2848                 continue
2849
2850             startv = (
2851                 start % self._pagesize
2852                 if firstid <= start < nextfirstid
2853                 else 0)
2854             endv = (
2855                 ((end - 1) % self._pagesize) + 1
2856                 if (end is not None and firstid <= end <= nextfirstid)
2857                 else None)
2858
2859             try:
2860                 page_results = self.getpage(pagenum)
2861             except Exception:
2862                 self._pagecount = pagenum - 1
2863                 raise
2864             if startv != 0 or endv is not None:
2865                 page_results = page_results[startv:endv]
2866             yield from page_results
2867
2868             # A little optimization - if current page is not "full", ie. does
2869             # not contain page_size videos then we can assume that this page
2870             # is the last one - there are no more ids on further pages -
2871             # i.e. no need to query again.
2872             if len(page_results) + startv < self._pagesize:
2873                 break
2874
2875             # If we got the whole page, but the next page is not interesting,
2876             # break out early as well
2877             if end == nextfirstid:
2878                 break
2879
2880
2881 class InAdvancePagedList(PagedList):
2882     """PagedList with total number of pages known in advance"""
2883
2884     def __init__(self, pagefunc, pagecount, pagesize):
2885         PagedList.__init__(self, pagefunc, pagesize, True)
2886         self._pagecount = pagecount
2887
2888     def _getslice(self, start, end):
2889         start_page = start // self._pagesize
2890         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2891         skip_elems = start - start_page * self._pagesize
2892         only_more = None if end is None else end - start
2893         for pagenum in range(start_page, end_page):
2894             page_results = self.getpage(pagenum)
2895             if skip_elems:
2896                 page_results = page_results[skip_elems:]
2897                 skip_elems = None
2898             if only_more is not None:
2899                 if len(page_results) < only_more:
2900                     only_more -= len(page_results)
2901                 else:
2902                     yield from page_results[:only_more]
2903                     break
2904             yield from page_results
2905
2906
2907 class PlaylistEntries:
2908     MissingEntry = object()
2909     is_exhausted = False
2910
2911     def __init__(self, ydl, info_dict):
2912         self.ydl = ydl
2913
2914         # _entries must be assigned now since infodict can change during iteration
2915         entries = info_dict.get('entries')
2916         if entries is None:
2917             raise EntryNotInPlaylist('There are no entries')
2918         elif isinstance(entries, list):
2919             self.is_exhausted = True
2920
2921         requested_entries = info_dict.get('requested_entries')
2922         self.is_incomplete = bool(requested_entries)
2923         if self.is_incomplete:
2924             assert self.is_exhausted
2925             self._entries = [self.MissingEntry] * max(requested_entries)
2926             for i, entry in zip(requested_entries, entries):
2927                 self._entries[i - 1] = entry
2928         elif isinstance(entries, (list, PagedList, LazyList)):
2929             self._entries = entries
2930         else:
2931             self._entries = LazyList(entries)
2932
2933     PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
2934         (?P<start>[+-]?\d+)?
2935         (?P<range>[:-]
2936             (?P<end>[+-]?\d+|inf(?:inite)?)?
2937             (?::(?P<step>[+-]?\d+))?
2938         )?''')
2939
2940     @classmethod
2941     def parse_playlist_items(cls, string):
2942         for segment in string.split(','):
2943             if not segment:
2944                 raise ValueError('There is two or more consecutive commas')
2945             mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
2946             if not mobj:
2947                 raise ValueError(f'{segment!r} is not a valid specification')
2948             start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
2949             if int_or_none(step) == 0:
2950                 raise ValueError(f'Step in {segment!r} cannot be zero')
2951             yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
2952
2953     def get_requested_items(self):
2954         playlist_items = self.ydl.params.get('playlist_items')
2955         playlist_start = self.ydl.params.get('playliststart', 1)
2956         playlist_end = self.ydl.params.get('playlistend')
2957         # For backwards compatibility, interpret -1 as whole list
2958         if playlist_end in (-1, None):
2959             playlist_end = ''
2960         if not playlist_items:
2961             playlist_items = f'{playlist_start}:{playlist_end}'
2962         elif playlist_start != 1 or playlist_end:
2963             self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
2964
2965         for index in self.parse_playlist_items(playlist_items):
2966             for i, entry in self[index]:
2967                 yield i, entry
2968                 if not entry:
2969                     continue
2970                 try:
2971                     # TODO: Add auto-generated fields
2972                     self.ydl._match_entry(entry, incomplete=True, silent=True)
2973                 except (ExistingVideoReached, RejectedVideoReached):
2974                     return
2975
2976     def get_full_count(self):
2977         if self.is_exhausted and not self.is_incomplete:
2978             return len(self)
2979         elif isinstance(self._entries, InAdvancePagedList):
2980             if self._entries._pagesize == 1:
2981                 return self._entries._pagecount
2982
2983     @functools.cached_property
2984     def _getter(self):
2985         if isinstance(self._entries, list):
2986             def get_entry(i):
2987                 try:
2988                     entry = self._entries[i]
2989                 except IndexError:
2990                     entry = self.MissingEntry
2991                     if not self.is_incomplete:
2992                         raise self.IndexError()
2993                 if entry is self.MissingEntry:
2994                     raise EntryNotInPlaylist(f'Entry {i} cannot be found')
2995                 return entry
2996         else:
2997             def get_entry(i):
2998                 try:
2999                     return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
3000                 except (LazyList.IndexError, PagedList.IndexError):
3001                     raise self.IndexError()
3002         return get_entry
3003
3004     def __getitem__(self, idx):
3005         if isinstance(idx, int):
3006             idx = slice(idx, idx)
3007
3008         # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
3009         step = 1 if idx.step is None else idx.step
3010         if idx.start is None:
3011             start = 0 if step > 0 else len(self) - 1
3012         else:
3013             start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
3014
3015         # NB: Do not call len(self) when idx == [:]
3016         if idx.stop is None:
3017             stop = 0 if step < 0 else float('inf')
3018         else:
3019             stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
3020         stop += [-1, 1][step > 0]
3021
3022         for i in frange(start, stop, step):
3023             if i < 0:
3024                 continue
3025             try:
3026                 entry = self._getter(i)
3027             except self.IndexError:
3028                 self.is_exhausted = True
3029                 if step > 0:
3030                     break
3031                 continue
3032             yield i + 1, entry
3033
3034     def __len__(self):
3035         return len(tuple(self[:]))
3036
3037     class IndexError(IndexError):
3038         pass
3039
3040
3041 def uppercase_escape(s):
3042     unicode_escape = codecs.getdecoder('unicode_escape')
3043     return re.sub(
3044         r'\\U[0-9a-fA-F]{8}',
3045         lambda m: unicode_escape(m.group(0))[0],
3046         s)
3047
3048
3049 def lowercase_escape(s):
3050     unicode_escape = codecs.getdecoder('unicode_escape')
3051     return re.sub(
3052         r'\\u[0-9a-fA-F]{4}',
3053         lambda m: unicode_escape(m.group(0))[0],
3054         s)
3055
3056
3057 def escape_rfc3986(s):
3058     """Escape non-ASCII characters as suggested by RFC 3986"""
3059     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3060
3061
3062 def escape_url(url):
3063     """Escape URL as suggested by RFC 3986"""
3064     url_parsed = urllib.parse.urlparse(url)
3065     return url_parsed._replace(
3066         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3067         path=escape_rfc3986(url_parsed.path),
3068         params=escape_rfc3986(url_parsed.params),
3069         query=escape_rfc3986(url_parsed.query),
3070         fragment=escape_rfc3986(url_parsed.fragment)
3071     ).geturl()
3072
3073
3074 def parse_qs(url):
3075     return urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
3076
3077
3078 def read_batch_urls(batch_fd):
3079     def fixup(url):
3080         if not isinstance(url, str):
3081             url = url.decode('utf-8', 'replace')
3082         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3083         for bom in BOM_UTF8:
3084             if url.startswith(bom):
3085                 url = url[len(bom):]
3086         url = url.lstrip()
3087         if not url or url.startswith(('#', ';', ']')):
3088             return False
3089         # "#" cannot be stripped out since it is part of the URI
3090         # However, it can be safely stripped out if following a whitespace
3091         return re.split(r'\s#', url, 1)[0].rstrip()
3092
3093     with contextlib.closing(batch_fd) as fd:
3094         return [url for url in map(fixup, fd) if url]
3095
3096
3097 def urlencode_postdata(*args, **kargs):
3098     return urllib.parse.urlencode(*args, **kargs).encode('ascii')
3099
3100
3101 def update_url_query(url, query):
3102     if not query:
3103         return url
3104     parsed_url = urllib.parse.urlparse(url)
3105     qs = urllib.parse.parse_qs(parsed_url.query)
3106     qs.update(query)
3107     return urllib.parse.urlunparse(parsed_url._replace(
3108         query=urllib.parse.urlencode(qs, True)))
3109
3110
3111 def update_Request(req, url=None, data=None, headers=None, query=None):
3112     req_headers = req.headers.copy()
3113     req_headers.update(headers or {})
3114     req_data = data or req.data
3115     req_url = update_url_query(url or req.get_full_url(), query)
3116     req_get_method = req.get_method()
3117     if req_get_method == 'HEAD':
3118         req_type = HEADRequest
3119     elif req_get_method == 'PUT':
3120         req_type = PUTRequest
3121     else:
3122         req_type = urllib.request.Request
3123     new_req = req_type(
3124         req_url, data=req_data, headers=req_headers,
3125         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3126     if hasattr(req, 'timeout'):
3127         new_req.timeout = req.timeout
3128     return new_req
3129
3130
3131 def _multipart_encode_impl(data, boundary):
3132     content_type = 'multipart/form-data; boundary=%s' % boundary
3133
3134     out = b''
3135     for k, v in data.items():
3136         out += b'--' + boundary.encode('ascii') + b'\r\n'
3137         if isinstance(k, str):
3138             k = k.encode()
3139         if isinstance(v, str):
3140             v = v.encode()
3141         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3142         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3143         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3144         if boundary.encode('ascii') in content:
3145             raise ValueError('Boundary overlaps with data')
3146         out += content
3147
3148     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3149
3150     return out, content_type
3151
3152
3153 def multipart_encode(data, boundary=None):
3154     '''
3155     Encode a dict to RFC 7578-compliant form-data
3156
3157     data:
3158         A dict where keys and values can be either Unicode or bytes-like
3159         objects.
3160     boundary:
3161         If specified a Unicode object, it's used as the boundary. Otherwise
3162         a random boundary is generated.
3163
3164     Reference: https://tools.ietf.org/html/rfc7578
3165     '''
3166     has_specified_boundary = boundary is not None
3167
3168     while True:
3169         if boundary is None:
3170             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3171
3172         try:
3173             out, content_type = _multipart_encode_impl(data, boundary)
3174             break
3175         except ValueError:
3176             if has_specified_boundary:
3177                 raise
3178             boundary = None
3179
3180     return out, content_type
3181
3182
3183 def variadic(x, allowed_types=(str, bytes, dict)):
3184     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
3185
3186
3187 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3188     for val in map(d.get, variadic(key_or_keys)):
3189         if val is not None and (val or not skip_false_values):
3190             return val
3191     return default
3192
3193
3194 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
3195     for f in funcs:
3196         try:
3197             val = f(*args, **kwargs)
3198         except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
3199             pass
3200         else:
3201             if expected_type is None or isinstance(val, expected_type):
3202                 return val
3203
3204
3205 def try_get(src, getter, expected_type=None):
3206     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
3207
3208
3209 def filter_dict(dct, cndn=lambda _, v: v is not None):
3210     return {k: v for k, v in dct.items() if cndn(k, v)}
3211
3212
3213 def merge_dicts(*dicts):
3214     merged = {}
3215     for a_dict in dicts:
3216         for k, v in a_dict.items():
3217             if (v is not None and k not in merged
3218                     or isinstance(v, str) and merged[k] == ''):
3219                 merged[k] = v
3220     return merged
3221
3222
3223 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3224     return string if isinstance(string, str) else str(string, encoding, errors)
3225
3226
3227 US_RATINGS = {
3228     'G': 0,
3229     'PG': 10,
3230     'PG-13': 13,
3231     'R': 16,
3232     'NC': 18,
3233 }
3234
3235
3236 TV_PARENTAL_GUIDELINES = {
3237     'TV-Y': 0,
3238     'TV-Y7': 7,
3239     'TV-G': 0,
3240     'TV-PG': 0,
3241     'TV-14': 14,
3242     'TV-MA': 17,
3243 }
3244
3245
3246 def parse_age_limit(s):
3247     # isinstance(False, int) is True. So type() must be used instead
3248     if type(s) is int:  # noqa: E721
3249         return s if 0 <= s <= 21 else None
3250     elif not isinstance(s, str):
3251         return None
3252     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
3253     if m:
3254         return int(m.group('age'))
3255     s = s.upper()
3256     if s in US_RATINGS:
3257         return US_RATINGS[s]
3258     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
3259     if m:
3260         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
3261     return None
3262
3263
3264 def strip_jsonp(code):
3265     return re.sub(
3266         r'''(?sx)^
3267             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3268             (?:\s*&&\s*(?P=func_name))?
3269             \s*\(\s*(?P<callback_data>.*)\);?
3270             \s*?(?://[^\n]*)*$''',
3271         r'\g<callback_data>', code)
3272
3273
3274 def js_to_json(code, vars={}, *, strict=False):
3275     # vars is a dict of var, val pairs to substitute
3276     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
3277     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
3278     INTEGER_TABLE = (
3279         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
3280         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
3281     )
3282
3283     def fix_kv(m):
3284         v = m.group(0)
3285         if v in ('true', 'false', 'null'):
3286             return v
3287         elif v in ('undefined', 'void 0'):
3288             return 'null'
3289         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
3290             return ""
3291
3292         if v[0] in ("'", '"'):
3293             v = re.sub(r'(?s)\\.|"', lambda m: {
3294                 '"': '\\"',
3295                 "\\'": "'",
3296                 '\\\n': '',
3297                 '\\x': '\\u00',
3298             }.get(m.group(0), m.group(0)), v[1:-1])
3299         else:
3300             for regex, base in INTEGER_TABLE:
3301                 im = re.match(regex, v)
3302                 if im:
3303                     i = int(im.group(1), base)
3304                     return '"%d":' % i if v.endswith(':') else '%d' % i
3305
3306             if v in vars:
3307                 return json.dumps(vars[v])
3308             if strict:
3309                 raise ValueError(f'Unknown value: {v}')
3310
3311         return '"%s"' % v
3312
3313     def create_map(mobj):
3314         return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
3315
3316     code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
3317     if not strict:
3318         code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
3319         code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
3320
3321     return re.sub(r'''(?sx)
3322         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
3323         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
3324         {comment}|,(?={skip}[\]}}])|
3325         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3326         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
3327         [0-9]+(?={skip}:)|
3328         !+
3329         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
3330
3331
3332 def qualities(quality_ids):
3333     """ Get a numeric quality value out of a list of possible values """
3334     def q(qid):
3335         try:
3336             return quality_ids.index(qid)
3337         except ValueError:
3338             return -1
3339     return q
3340
3341
3342 POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
3343
3344
3345 DEFAULT_OUTTMPL = {
3346     'default': '%(title)s [%(id)s].%(ext)s',
3347     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3348 }
3349 OUTTMPL_TYPES = {
3350     'chapter': None,
3351     'subtitle': None,
3352     'thumbnail': None,
3353     'description': 'description',
3354     'annotation': 'annotations.xml',
3355     'infojson': 'info.json',
3356     'link': None,
3357     'pl_video': None,
3358     'pl_thumbnail': None,
3359     'pl_description': 'description',
3360     'pl_infojson': 'info.json',
3361 }
3362
3363 # As of [1] format syntax is:
3364 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3365 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3366 STR_FORMAT_RE_TMPL = r'''(?x)
3367     (?<!%)(?P<prefix>(?:%%)*)
3368     %
3369     (?P<has_key>\((?P<key>{0})\))?
3370     (?P<format>
3371         (?P<conversion>[#0\-+ ]+)?
3372         (?P<min_width>\d+)?
3373         (?P<precision>\.\d+)?
3374         (?P<len_mod>[hlL])?  # unused in python
3375         {1}  # conversion type
3376     )
3377 '''
3378
3379
3380 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3381
3382
3383 def limit_length(s, length):
3384     """ Add ellipses to overly long strings """
3385     if s is None:
3386         return None
3387     ELLIPSES = '...'
3388     if len(s) > length:
3389         return s[:length - len(ELLIPSES)] + ELLIPSES
3390     return s
3391
3392
3393 def version_tuple(v):
3394     return tuple(int(e) for e in re.split(r'[-.]', v))
3395
3396
3397 def is_outdated_version(version, limit, assume_new=True):
3398     if not version:
3399         return not assume_new
3400     try:
3401         return version_tuple(version) < version_tuple(limit)
3402     except ValueError:
3403         return not assume_new
3404
3405
3406 def ytdl_is_updateable():
3407     """ Returns if yt-dlp can be updated with -U """
3408
3409     from .update import is_non_updateable
3410
3411     return not is_non_updateable()
3412
3413
3414 def args_to_str(args):
3415     # Get a short string representation for a subprocess command
3416     return ' '.join(compat_shlex_quote(a) for a in args)
3417
3418
3419 def error_to_compat_str(err):
3420     return str(err)
3421
3422
3423 def error_to_str(err):
3424     return f'{type(err).__name__}: {err}'
3425
3426
3427 def mimetype2ext(mt):
3428     if mt is None:
3429         return None
3430
3431     mt, _, params = mt.partition(';')
3432     mt = mt.strip()
3433
3434     FULL_MAP = {
3435         'audio/mp4': 'm4a',
3436         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3437         # it's the most popular one
3438         'audio/mpeg': 'mp3',
3439         'audio/x-wav': 'wav',
3440         'audio/wav': 'wav',
3441         'audio/wave': 'wav',
3442     }
3443
3444     ext = FULL_MAP.get(mt)
3445     if ext is not None:
3446         return ext
3447
3448     SUBTYPE_MAP = {
3449         '3gpp': '3gp',
3450         'smptett+xml': 'tt',
3451         'ttaf+xml': 'dfxp',
3452         'ttml+xml': 'ttml',
3453         'x-flv': 'flv',
3454         'x-mp4-fragmented': 'mp4',
3455         'x-ms-sami': 'sami',
3456         'x-ms-wmv': 'wmv',
3457         'mpegurl': 'm3u8',
3458         'x-mpegurl': 'm3u8',
3459         'vnd.apple.mpegurl': 'm3u8',
3460         'dash+xml': 'mpd',
3461         'f4m+xml': 'f4m',
3462         'hds+xml': 'f4m',
3463         'vnd.ms-sstr+xml': 'ism',
3464         'quicktime': 'mov',
3465         'mp2t': 'ts',
3466         'x-wav': 'wav',
3467         'filmstrip+json': 'fs',
3468         'svg+xml': 'svg',
3469     }
3470
3471     _, _, subtype = mt.rpartition('/')
3472     ext = SUBTYPE_MAP.get(subtype.lower())
3473     if ext is not None:
3474         return ext
3475
3476     SUFFIX_MAP = {
3477         'json': 'json',
3478         'xml': 'xml',
3479         'zip': 'zip',
3480         'gzip': 'gz',
3481     }
3482
3483     _, _, suffix = subtype.partition('+')
3484     ext = SUFFIX_MAP.get(suffix)
3485     if ext is not None:
3486         return ext
3487
3488     return subtype.replace('+', '.')
3489
3490
3491 def ext2mimetype(ext_or_url):
3492     if not ext_or_url:
3493         return None
3494     if '.' not in ext_or_url:
3495         ext_or_url = f'file.{ext_or_url}'
3496     return mimetypes.guess_type(ext_or_url)[0]
3497
3498
3499 def parse_codecs(codecs_str):
3500     # http://tools.ietf.org/html/rfc6381
3501     if not codecs_str:
3502         return {}
3503     split_codecs = list(filter(None, map(
3504         str.strip, codecs_str.strip().strip(',').split(','))))
3505     vcodec, acodec, scodec, hdr = None, None, None, None
3506     for full_codec in split_codecs:
3507         parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
3508         if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3509                         'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3510             if vcodec:
3511                 continue
3512             vcodec = full_codec
3513             if parts[0] in ('dvh1', 'dvhe'):
3514                 hdr = 'DV'
3515             elif parts[0] == 'av1' and traverse_obj(parts, 3) == '10':
3516                 hdr = 'HDR10'
3517             elif parts[:2] == ['vp9', '2']:
3518                 hdr = 'HDR10'
3519         elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac',
3520                           'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3521             acodec = acodec or full_codec
3522         elif parts[0] in ('stpp', 'wvtt'):
3523             scodec = scodec or full_codec
3524         else:
3525             write_string(f'WARNING: Unknown codec {full_codec}\n')
3526     if vcodec or acodec or scodec:
3527         return {
3528             'vcodec': vcodec or 'none',
3529             'acodec': acodec or 'none',
3530             'dynamic_range': hdr,
3531             **({'scodec': scodec} if scodec is not None else {}),
3532         }
3533     elif len(split_codecs) == 2:
3534         return {
3535             'vcodec': split_codecs[0],
3536             'acodec': split_codecs[1],
3537         }
3538     return {}
3539
3540
3541 def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
3542     assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
3543
3544     allow_mkv = not preferences or 'mkv' in preferences
3545
3546     if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
3547         return 'mkv'  # TODO: any other format allows this?
3548
3549     # TODO: All codecs supported by parse_codecs isn't handled here
3550     COMPATIBLE_CODECS = {
3551         'mp4': {
3552             'av1', 'hevc', 'avc1', 'mp4a',  # fourcc (m3u8, mpd)
3553             'h264', 'aacl', 'ec-3',  # Set in ISM
3554         },
3555         'webm': {
3556             'av1', 'vp9', 'vp8', 'opus', 'vrbs',
3557             'vp9x', 'vp8x',  # in the webm spec
3558         },
3559     }
3560
3561     sanitize_codec = functools.partial(try_get, getter=lambda x: x[0].split('.')[0].replace('0', ''))
3562     vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
3563
3564     for ext in preferences or COMPATIBLE_CODECS.keys():
3565         codec_set = COMPATIBLE_CODECS.get(ext, set())
3566         if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
3567             return ext
3568
3569     COMPATIBLE_EXTS = (
3570         {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
3571         {'webm'},
3572     )
3573     for ext in preferences or vexts:
3574         current_exts = {ext, *vexts, *aexts}
3575         if ext == 'mkv' or current_exts == {ext} or any(
3576                 ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
3577             return ext
3578     return 'mkv' if allow_mkv else preferences[-1]
3579
3580
3581 def urlhandle_detect_ext(url_handle):
3582     getheader = url_handle.headers.get
3583
3584     cd = getheader('Content-Disposition')
3585     if cd:
3586         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3587         if m:
3588             e = determine_ext(m.group('filename'), default_ext=None)
3589             if e:
3590                 return e
3591
3592     return mimetype2ext(getheader('Content-Type'))
3593
3594
3595 def encode_data_uri(data, mime_type):
3596     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3597
3598
3599 def age_restricted(content_limit, age_limit):
3600     """ Returns True iff the content should be blocked """
3601
3602     if age_limit is None:  # No limit set
3603         return False
3604     if content_limit is None:
3605         return False  # Content available for everyone
3606     return age_limit < content_limit
3607
3608
3609 # List of known byte-order-marks (BOM)
3610 BOMS = [
3611     (b'\xef\xbb\xbf', 'utf-8'),
3612     (b'\x00\x00\xfe\xff', 'utf-32-be'),
3613     (b'\xff\xfe\x00\x00', 'utf-32-le'),
3614     (b'\xff\xfe', 'utf-16-le'),
3615     (b'\xfe\xff', 'utf-16-be'),
3616 ]
3617
3618
3619 def is_html(first_bytes):
3620     """ Detect whether a file contains HTML by examining its first bytes. """
3621
3622     encoding = 'utf-8'
3623     for bom, enc in BOMS:
3624         while first_bytes.startswith(bom):
3625             encoding, first_bytes = enc, first_bytes[len(bom):]
3626
3627     return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
3628
3629
3630 def determine_protocol(info_dict):
3631     protocol = info_dict.get('protocol')
3632     if protocol is not None:
3633         return protocol
3634
3635     url = sanitize_url(info_dict['url'])
3636     if url.startswith('rtmp'):
3637         return 'rtmp'
3638     elif url.startswith('mms'):
3639         return 'mms'
3640     elif url.startswith('rtsp'):
3641         return 'rtsp'
3642
3643     ext = determine_ext(url)
3644     if ext == 'm3u8':
3645         return 'm3u8' if info_dict.get('is_live') else 'm3u8_native'
3646     elif ext == 'f4m':
3647         return 'f4m'
3648
3649     return urllib.parse.urlparse(url).scheme
3650
3651
3652 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3653     """ Render a list of rows, each as a list of values.
3654     Text after a \t will be right aligned """
3655     def width(string):
3656         return len(remove_terminal_sequences(string).replace('\t', ''))
3657
3658     def get_max_lens(table):
3659         return [max(width(str(v)) for v in col) for col in zip(*table)]
3660
3661     def filter_using_list(row, filterArray):
3662         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3663
3664     max_lens = get_max_lens(data) if hide_empty else []
3665     header_row = filter_using_list(header_row, max_lens)
3666     data = [filter_using_list(row, max_lens) for row in data]
3667
3668     table = [header_row] + data
3669     max_lens = get_max_lens(table)
3670     extra_gap += 1
3671     if delim:
3672         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3673         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3674     for row in table:
3675         for pos, text in enumerate(map(str, row)):
3676             if '\t' in text:
3677                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3678             else:
3679                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3680     ret = '\n'.join(''.join(row).rstrip() for row in table)
3681     return ret
3682
3683
3684 def _match_one(filter_part, dct, incomplete):
3685     # TODO: Generalize code with YoutubeDL._build_format_filter
3686     STRING_OPERATORS = {
3687         '*=': operator.contains,
3688         '^=': lambda attr, value: attr.startswith(value),
3689         '$=': lambda attr, value: attr.endswith(value),
3690         '~=': lambda attr, value: re.search(value, attr),
3691     }
3692     COMPARISON_OPERATORS = {
3693         **STRING_OPERATORS,
3694         '<=': operator.le,  # "<=" must be defined above "<"
3695         '<': operator.lt,
3696         '>=': operator.ge,
3697         '>': operator.gt,
3698         '=': operator.eq,
3699     }
3700
3701     if isinstance(incomplete, bool):
3702         is_incomplete = lambda _: incomplete
3703     else:
3704         is_incomplete = lambda k: k in incomplete
3705
3706     operator_rex = re.compile(r'''(?x)
3707         (?P<key>[a-z_]+)
3708         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3709         (?:
3710             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3711             (?P<strval>.+?)
3712         )
3713         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3714     m = operator_rex.fullmatch(filter_part.strip())
3715     if m:
3716         m = m.groupdict()
3717         unnegated_op = COMPARISON_OPERATORS[m['op']]
3718         if m['negation']:
3719             op = lambda attr, value: not unnegated_op(attr, value)
3720         else:
3721             op = unnegated_op
3722         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3723         if m['quote']:
3724             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3725         actual_value = dct.get(m['key'])
3726         numeric_comparison = None
3727         if isinstance(actual_value, (int, float)):
3728             # If the original field is a string and matching comparisonvalue is
3729             # a number we should respect the origin of the original field
3730             # and process comparison value as a string (see
3731             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3732             try:
3733                 numeric_comparison = int(comparison_value)
3734             except ValueError:
3735                 numeric_comparison = parse_filesize(comparison_value)
3736                 if numeric_comparison is None:
3737                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3738                 if numeric_comparison is None:
3739                     numeric_comparison = parse_duration(comparison_value)
3740         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3741             raise ValueError('Operator %s only supports string values!' % m['op'])
3742         if actual_value is None:
3743             return is_incomplete(m['key']) or m['none_inclusive']
3744         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3745
3746     UNARY_OPERATORS = {
3747         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3748         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3749     }
3750     operator_rex = re.compile(r'''(?x)
3751         (?P<op>%s)\s*(?P<key>[a-z_]+)
3752         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3753     m = operator_rex.fullmatch(filter_part.strip())
3754     if m:
3755         op = UNARY_OPERATORS[m.group('op')]
3756         actual_value = dct.get(m.group('key'))
3757         if is_incomplete(m.group('key')) and actual_value is None:
3758             return True
3759         return op(actual_value)
3760
3761     raise ValueError('Invalid filter part %r' % filter_part)
3762
3763
3764 def match_str(filter_str, dct, incomplete=False):
3765     """ Filter a dictionary with a simple string syntax.
3766     @returns           Whether the filter passes
3767     @param incomplete  Set of keys that is expected to be missing from dct.
3768                        Can be True/False to indicate all/none of the keys may be missing.
3769                        All conditions on incomplete keys pass if the key is missing
3770     """
3771     return all(
3772         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3773         for filter_part in re.split(r'(?<!\\)&', filter_str))
3774
3775
3776 def match_filter_func(filters):
3777     if not filters:
3778         return None
3779     filters = set(variadic(filters))
3780
3781     interactive = '-' in filters
3782     if interactive:
3783         filters.remove('-')
3784
3785     def _match_func(info_dict, incomplete=False):
3786         if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
3787             return NO_DEFAULT if interactive and not incomplete else None
3788         else:
3789             video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
3790             filter_str = ') | ('.join(map(str.strip, filters))
3791             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3792     return _match_func
3793
3794
3795 class download_range_func:
3796     def __init__(self, chapters, ranges):
3797         self.chapters, self.ranges = chapters, ranges
3798
3799     def __call__(self, info_dict, ydl):
3800         if not self.ranges and not self.chapters:
3801             yield {}
3802
3803         warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
3804                    else 'Cannot match chapters since chapter information is unavailable')
3805         for regex in self.chapters or []:
3806             for i, chapter in enumerate(info_dict.get('chapters') or []):
3807                 if re.search(regex, chapter['title']):
3808                     warning = None
3809                     yield {**chapter, 'index': i}
3810         if self.chapters and warning:
3811             ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
3812
3813         yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
3814
3815     def __eq__(self, other):
3816         return (isinstance(other, download_range_func)
3817                 and self.chapters == other.chapters and self.ranges == other.ranges)
3818
3819
3820 def parse_dfxp_time_expr(time_expr):
3821     if not time_expr:
3822         return
3823
3824     mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
3825     if mobj:
3826         return float(mobj.group('time_offset'))
3827
3828     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3829     if mobj:
3830         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3831
3832
3833 def srt_subtitles_timecode(seconds):
3834     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3835
3836
3837 def ass_subtitles_timecode(seconds):
3838     time = timetuple_from_msec(seconds * 1000)
3839     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3840
3841
3842 def dfxp2srt(dfxp_data):
3843     '''
3844     @param dfxp_data A bytes-like object containing DFXP data
3845     @returns A unicode object containing converted SRT data
3846     '''
3847     LEGACY_NAMESPACES = (
3848         (b'http://www.w3.org/ns/ttml', [
3849             b'http://www.w3.org/2004/11/ttaf1',
3850             b'http://www.w3.org/2006/04/ttaf1',
3851             b'http://www.w3.org/2006/10/ttaf1',
3852         ]),
3853         (b'http://www.w3.org/ns/ttml#styling', [
3854             b'http://www.w3.org/ns/ttml#style',
3855         ]),
3856     )
3857
3858     SUPPORTED_STYLING = [
3859         'color',
3860         'fontFamily',
3861         'fontSize',
3862         'fontStyle',
3863         'fontWeight',
3864         'textDecoration'
3865     ]
3866
3867     _x = functools.partial(xpath_with_ns, ns_map={
3868         'xml': 'http://www.w3.org/XML/1998/namespace',
3869         'ttml': 'http://www.w3.org/ns/ttml',
3870         'tts': 'http://www.w3.org/ns/ttml#styling',
3871     })
3872
3873     styles = {}
3874     default_style = {}
3875
3876     class TTMLPElementParser:
3877         _out = ''
3878         _unclosed_elements = []
3879         _applied_styles = []
3880
3881         def start(self, tag, attrib):
3882             if tag in (_x('ttml:br'), 'br'):
3883                 self._out += '\n'
3884             else:
3885                 unclosed_elements = []
3886                 style = {}
3887                 element_style_id = attrib.get('style')
3888                 if default_style:
3889                     style.update(default_style)
3890                 if element_style_id:
3891                     style.update(styles.get(element_style_id, {}))
3892                 for prop in SUPPORTED_STYLING:
3893                     prop_val = attrib.get(_x('tts:' + prop))
3894                     if prop_val:
3895                         style[prop] = prop_val
3896                 if style:
3897                     font = ''
3898                     for k, v in sorted(style.items()):
3899                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3900                             continue
3901                         if k == 'color':
3902                             font += ' color="%s"' % v
3903                         elif k == 'fontSize':
3904                             font += ' size="%s"' % v
3905                         elif k == 'fontFamily':
3906                             font += ' face="%s"' % v
3907                         elif k == 'fontWeight' and v == 'bold':
3908                             self._out += '<b>'
3909                             unclosed_elements.append('b')
3910                         elif k == 'fontStyle' and v == 'italic':
3911                             self._out += '<i>'
3912                             unclosed_elements.append('i')
3913                         elif k == 'textDecoration' and v == 'underline':
3914                             self._out += '<u>'
3915                             unclosed_elements.append('u')
3916                     if font:
3917                         self._out += '<font' + font + '>'
3918                         unclosed_elements.append('font')
3919                     applied_style = {}
3920                     if self._applied_styles:
3921                         applied_style.update(self._applied_styles[-1])
3922                     applied_style.update(style)
3923                     self._applied_styles.append(applied_style)
3924                 self._unclosed_elements.append(unclosed_elements)
3925
3926         def end(self, tag):
3927             if tag not in (_x('ttml:br'), 'br'):
3928                 unclosed_elements = self._unclosed_elements.pop()
3929                 for element in reversed(unclosed_elements):
3930                     self._out += '</%s>' % element
3931                 if unclosed_elements and self._applied_styles:
3932                     self._applied_styles.pop()
3933
3934         def data(self, data):
3935             self._out += data
3936
3937         def close(self):
3938             return self._out.strip()
3939
3940     def parse_node(node):
3941         target = TTMLPElementParser()
3942         parser = xml.etree.ElementTree.XMLParser(target=target)
3943         parser.feed(xml.etree.ElementTree.tostring(node))
3944         return parser.close()
3945
3946     for k, v in LEGACY_NAMESPACES:
3947         for ns in v:
3948             dfxp_data = dfxp_data.replace(ns, k)
3949
3950     dfxp = compat_etree_fromstring(dfxp_data)
3951     out = []
3952     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
3953
3954     if not paras:
3955         raise ValueError('Invalid dfxp/TTML subtitle')
3956
3957     repeat = False
3958     while True:
3959         for style in dfxp.findall(_x('.//ttml:style')):
3960             style_id = style.get('id') or style.get(_x('xml:id'))
3961             if not style_id:
3962                 continue
3963             parent_style_id = style.get('style')
3964             if parent_style_id:
3965                 if parent_style_id not in styles:
3966                     repeat = True
3967                     continue
3968                 styles[style_id] = styles[parent_style_id].copy()
3969             for prop in SUPPORTED_STYLING:
3970                 prop_val = style.get(_x('tts:' + prop))
3971                 if prop_val:
3972                     styles.setdefault(style_id, {})[prop] = prop_val
3973         if repeat:
3974             repeat = False
3975         else:
3976             break
3977
3978     for p in ('body', 'div'):
3979         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
3980         if ele is None:
3981             continue
3982         style = styles.get(ele.get('style'))
3983         if not style:
3984             continue
3985         default_style.update(style)
3986
3987     for para, index in zip(paras, itertools.count(1)):
3988         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
3989         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
3990         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
3991         if begin_time is None:
3992             continue
3993         if not end_time:
3994             if not dur:
3995                 continue
3996             end_time = begin_time + dur
3997         out.append('%d\n%s --> %s\n%s\n\n' % (
3998             index,
3999             srt_subtitles_timecode(begin_time),
4000             srt_subtitles_timecode(end_time),
4001             parse_node(para)))
4002
4003     return ''.join(out)
4004
4005
4006 def cli_option(params, command_option, param, separator=None):
4007     param = params.get(param)
4008     return ([] if param is None
4009             else [command_option, str(param)] if separator is None
4010             else [f'{command_option}{separator}{param}'])
4011
4012
4013 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4014     param = params.get(param)
4015     assert param in (True, False, None)
4016     return cli_option({True: true_value, False: false_value}, command_option, param, separator)
4017
4018
4019 def cli_valueless_option(params, command_option, param, expected_value=True):
4020     return [command_option] if params.get(param) == expected_value else []
4021
4022
4023 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4024     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4025         if use_compat:
4026             return argdict
4027         else:
4028             argdict = None
4029     if argdict is None:
4030         return default
4031     assert isinstance(argdict, dict)
4032
4033     assert isinstance(keys, (list, tuple))
4034     for key_list in keys:
4035         arg_list = list(filter(
4036             lambda x: x is not None,
4037             [argdict.get(key.lower()) for key in variadic(key_list)]))
4038         if arg_list:
4039             return [arg for args in arg_list for arg in args]
4040     return default
4041
4042
4043 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
4044     main_key, exe = main_key.lower(), exe.lower()
4045     root_key = exe if main_key == exe else f'{main_key}+{exe}'
4046     keys = [f'{root_key}{k}' for k in (keys or [''])]
4047     if root_key in keys:
4048         if main_key != exe:
4049             keys.append((main_key, exe))
4050         keys.append('default')
4051     else:
4052         use_compat = False
4053     return cli_configuration_args(argdict, keys, default, use_compat)
4054
4055
4056 class ISO639Utils:
4057     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4058     _lang_map = {
4059         'aa': 'aar',
4060         'ab': 'abk',
4061         'ae': 'ave',
4062         'af': 'afr',
4063         'ak': 'aka',
4064         'am': 'amh',
4065         'an': 'arg',
4066         'ar': 'ara',
4067         'as': 'asm',
4068         'av': 'ava',
4069         'ay': 'aym',
4070         'az': 'aze',
4071         'ba': 'bak',
4072         'be': 'bel',
4073         'bg': 'bul',
4074         'bh': 'bih',
4075         'bi': 'bis',
4076         'bm': 'bam',
4077         'bn': 'ben',
4078         'bo': 'bod',
4079         'br': 'bre',
4080         'bs': 'bos',
4081         'ca': 'cat',
4082         'ce': 'che',
4083         'ch': 'cha',
4084         'co': 'cos',
4085         'cr': 'cre',
4086         'cs': 'ces',
4087         'cu': 'chu',
4088         'cv': 'chv',
4089         'cy': 'cym',
4090         'da': 'dan',
4091         'de': 'deu',
4092         'dv': 'div',
4093         'dz': 'dzo',
4094         'ee': 'ewe',
4095         'el': 'ell',
4096         'en': 'eng',
4097         'eo': 'epo',
4098         'es': 'spa',
4099         'et': 'est',
4100         'eu': 'eus',
4101         'fa': 'fas',
4102         'ff': 'ful',
4103         'fi': 'fin',
4104         'fj': 'fij',
4105         'fo': 'fao',
4106         'fr': 'fra',
4107         'fy': 'fry',
4108         'ga': 'gle',
4109         'gd': 'gla',
4110         'gl': 'glg',
4111         'gn': 'grn',
4112         'gu': 'guj',
4113         'gv': 'glv',
4114         'ha': 'hau',
4115         'he': 'heb',
4116         'iw': 'heb',  # Replaced by he in 1989 revision
4117         'hi': 'hin',
4118         'ho': 'hmo',
4119         'hr': 'hrv',
4120         'ht': 'hat',
4121         'hu': 'hun',
4122         'hy': 'hye',
4123         'hz': 'her',
4124         'ia': 'ina',
4125         'id': 'ind',
4126         'in': 'ind',  # Replaced by id in 1989 revision
4127         'ie': 'ile',
4128         'ig': 'ibo',
4129         'ii': 'iii',
4130         'ik': 'ipk',
4131         'io': 'ido',
4132         'is': 'isl',
4133         'it': 'ita',
4134         'iu': 'iku',
4135         'ja': 'jpn',
4136         'jv': 'jav',
4137         'ka': 'kat',
4138         'kg': 'kon',
4139         'ki': 'kik',
4140         'kj': 'kua',
4141         'kk': 'kaz',
4142         'kl': 'kal',
4143         'km': 'khm',
4144         'kn': 'kan',
4145         'ko': 'kor',
4146         'kr': 'kau',
4147         'ks': 'kas',
4148         'ku': 'kur',
4149         'kv': 'kom',
4150         'kw': 'cor',
4151         'ky': 'kir',
4152         'la': 'lat',
4153         'lb': 'ltz',
4154         'lg': 'lug',
4155         'li': 'lim',
4156         'ln': 'lin',
4157         'lo': 'lao',
4158         'lt': 'lit',
4159         'lu': 'lub',
4160         'lv': 'lav',
4161         'mg': 'mlg',
4162         'mh': 'mah',
4163         'mi': 'mri',
4164         'mk': 'mkd',
4165         'ml': 'mal',
4166         'mn': 'mon',
4167         'mr': 'mar',
4168         'ms': 'msa',
4169         'mt': 'mlt',
4170         'my': 'mya',
4171         'na': 'nau',
4172         'nb': 'nob',
4173         'nd': 'nde',
4174         'ne': 'nep',
4175         'ng': 'ndo',
4176         'nl': 'nld',
4177         'nn': 'nno',
4178         'no': 'nor',
4179         'nr': 'nbl',
4180         'nv': 'nav',
4181         'ny': 'nya',
4182         'oc': 'oci',
4183         'oj': 'oji',
4184         'om': 'orm',
4185         'or': 'ori',
4186         'os': 'oss',
4187         'pa': 'pan',
4188         'pi': 'pli',
4189         'pl': 'pol',
4190         'ps': 'pus',
4191         'pt': 'por',
4192         'qu': 'que',
4193         'rm': 'roh',
4194         'rn': 'run',
4195         'ro': 'ron',
4196         'ru': 'rus',
4197         'rw': 'kin',
4198         'sa': 'san',
4199         'sc': 'srd',
4200         'sd': 'snd',
4201         'se': 'sme',
4202         'sg': 'sag',
4203         'si': 'sin',
4204         'sk': 'slk',
4205         'sl': 'slv',
4206         'sm': 'smo',
4207         'sn': 'sna',
4208         'so': 'som',
4209         'sq': 'sqi',
4210         'sr': 'srp',
4211         'ss': 'ssw',
4212         'st': 'sot',
4213         'su': 'sun',
4214         'sv': 'swe',
4215         'sw': 'swa',
4216         'ta': 'tam',
4217         'te': 'tel',
4218         'tg': 'tgk',
4219         'th': 'tha',
4220         'ti': 'tir',
4221         'tk': 'tuk',
4222         'tl': 'tgl',
4223         'tn': 'tsn',
4224         'to': 'ton',
4225         'tr': 'tur',
4226         'ts': 'tso',
4227         'tt': 'tat',
4228         'tw': 'twi',
4229         'ty': 'tah',
4230         'ug': 'uig',
4231         'uk': 'ukr',
4232         'ur': 'urd',
4233         'uz': 'uzb',
4234         've': 'ven',
4235         'vi': 'vie',
4236         'vo': 'vol',
4237         'wa': 'wln',
4238         'wo': 'wol',
4239         'xh': 'xho',
4240         'yi': 'yid',
4241         'ji': 'yid',  # Replaced by yi in 1989 revision
4242         'yo': 'yor',
4243         'za': 'zha',
4244         'zh': 'zho',
4245         'zu': 'zul',
4246     }
4247
4248     @classmethod
4249     def short2long(cls, code):
4250         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4251         return cls._lang_map.get(code[:2])
4252
4253     @classmethod
4254     def long2short(cls, code):
4255         """Convert language code from ISO 639-2/T to ISO 639-1"""
4256         for short_name, long_name in cls._lang_map.items():
4257             if long_name == code:
4258                 return short_name
4259
4260
4261 class ISO3166Utils:
4262     # From http://data.okfn.org/data/core/country-list
4263     _country_map = {
4264         'AF': 'Afghanistan',
4265         'AX': 'Åland Islands',
4266         'AL': 'Albania',
4267         'DZ': 'Algeria',
4268         'AS': 'American Samoa',
4269         'AD': 'Andorra',
4270         'AO': 'Angola',
4271         'AI': 'Anguilla',
4272         'AQ': 'Antarctica',
4273         'AG': 'Antigua and Barbuda',
4274         'AR': 'Argentina',
4275         'AM': 'Armenia',
4276         'AW': 'Aruba',
4277         'AU': 'Australia',
4278         'AT': 'Austria',
4279         'AZ': 'Azerbaijan',
4280         'BS': 'Bahamas',
4281         'BH': 'Bahrain',
4282         'BD': 'Bangladesh',
4283         'BB': 'Barbados',
4284         'BY': 'Belarus',
4285         'BE': 'Belgium',
4286         'BZ': 'Belize',
4287         'BJ': 'Benin',
4288         'BM': 'Bermuda',
4289         'BT': 'Bhutan',
4290         'BO': 'Bolivia, Plurinational State of',
4291         'BQ': 'Bonaire, Sint Eustatius and Saba',
4292         'BA': 'Bosnia and Herzegovina',
4293         'BW': 'Botswana',
4294         'BV': 'Bouvet Island',
4295         'BR': 'Brazil',
4296         'IO': 'British Indian Ocean Territory',
4297         'BN': 'Brunei Darussalam',
4298         'BG': 'Bulgaria',
4299         'BF': 'Burkina Faso',
4300         'BI': 'Burundi',
4301         'KH': 'Cambodia',
4302         'CM': 'Cameroon',
4303         'CA': 'Canada',
4304         'CV': 'Cape Verde',
4305         'KY': 'Cayman Islands',
4306         'CF': 'Central African Republic',
4307         'TD': 'Chad',
4308         'CL': 'Chile',
4309         'CN': 'China',
4310         'CX': 'Christmas Island',
4311         'CC': 'Cocos (Keeling) Islands',
4312         'CO': 'Colombia',
4313         'KM': 'Comoros',
4314         'CG': 'Congo',
4315         'CD': 'Congo, the Democratic Republic of the',
4316         'CK': 'Cook Islands',
4317         'CR': 'Costa Rica',
4318         'CI': 'Côte d\'Ivoire',
4319         'HR': 'Croatia',
4320         'CU': 'Cuba',
4321         'CW': 'Curaçao',
4322         'CY': 'Cyprus',
4323         'CZ': 'Czech Republic',
4324         'DK': 'Denmark',
4325         'DJ': 'Djibouti',
4326         'DM': 'Dominica',
4327         'DO': 'Dominican Republic',
4328         'EC': 'Ecuador',
4329         'EG': 'Egypt',
4330         'SV': 'El Salvador',
4331         'GQ': 'Equatorial Guinea',
4332         'ER': 'Eritrea',
4333         'EE': 'Estonia',
4334         'ET': 'Ethiopia',
4335         'FK': 'Falkland Islands (Malvinas)',
4336         'FO': 'Faroe Islands',
4337         'FJ': 'Fiji',
4338         'FI': 'Finland',
4339         'FR': 'France',
4340         'GF': 'French Guiana',
4341         'PF': 'French Polynesia',
4342         'TF': 'French Southern Territories',
4343         'GA': 'Gabon',
4344         'GM': 'Gambia',
4345         'GE': 'Georgia',
4346         'DE': 'Germany',
4347         'GH': 'Ghana',
4348         'GI': 'Gibraltar',
4349         'GR': 'Greece',
4350         'GL': 'Greenland',
4351         'GD': 'Grenada',
4352         'GP': 'Guadeloupe',
4353         'GU': 'Guam',
4354         'GT': 'Guatemala',
4355         'GG': 'Guernsey',
4356         'GN': 'Guinea',
4357         'GW': 'Guinea-Bissau',
4358         'GY': 'Guyana',
4359         'HT': 'Haiti',
4360         'HM': 'Heard Island and McDonald Islands',
4361         'VA': 'Holy See (Vatican City State)',
4362         'HN': 'Honduras',
4363         'HK': 'Hong Kong',
4364         'HU': 'Hungary',
4365         'IS': 'Iceland',
4366         'IN': 'India',
4367         'ID': 'Indonesia',
4368         'IR': 'Iran, Islamic Republic of',
4369         'IQ': 'Iraq',
4370         'IE': 'Ireland',
4371         'IM': 'Isle of Man',
4372         'IL': 'Israel',
4373         'IT': 'Italy',
4374         'JM': 'Jamaica',
4375         'JP': 'Japan',
4376         'JE': 'Jersey',
4377         'JO': 'Jordan',
4378         'KZ': 'Kazakhstan',
4379         'KE': 'Kenya',
4380         'KI': 'Kiribati',
4381         'KP': 'Korea, Democratic People\'s Republic of',
4382         'KR': 'Korea, Republic of',
4383         'KW': 'Kuwait',
4384         'KG': 'Kyrgyzstan',
4385         'LA': 'Lao People\'s Democratic Republic',
4386         'LV': 'Latvia',
4387         'LB': 'Lebanon',
4388         'LS': 'Lesotho',
4389         'LR': 'Liberia',
4390         'LY': 'Libya',
4391         'LI': 'Liechtenstein',
4392         'LT': 'Lithuania',
4393         'LU': 'Luxembourg',
4394         'MO': 'Macao',
4395         'MK': 'Macedonia, the Former Yugoslav Republic of',
4396         'MG': 'Madagascar',
4397         'MW': 'Malawi',
4398         'MY': 'Malaysia',
4399         'MV': 'Maldives',
4400         'ML': 'Mali',
4401         'MT': 'Malta',
4402         'MH': 'Marshall Islands',
4403         'MQ': 'Martinique',
4404         'MR': 'Mauritania',
4405         'MU': 'Mauritius',
4406         'YT': 'Mayotte',
4407         'MX': 'Mexico',
4408         'FM': 'Micronesia, Federated States of',
4409         'MD': 'Moldova, Republic of',
4410         'MC': 'Monaco',
4411         'MN': 'Mongolia',
4412         'ME': 'Montenegro',
4413         'MS': 'Montserrat',
4414         'MA': 'Morocco',
4415         'MZ': 'Mozambique',
4416         'MM': 'Myanmar',
4417         'NA': 'Namibia',
4418         'NR': 'Nauru',
4419         'NP': 'Nepal',
4420         'NL': 'Netherlands',
4421         'NC': 'New Caledonia',
4422         'NZ': 'New Zealand',
4423         'NI': 'Nicaragua',
4424         'NE': 'Niger',
4425         'NG': 'Nigeria',
4426         'NU': 'Niue',
4427         'NF': 'Norfolk Island',
4428         'MP': 'Northern Mariana Islands',
4429         'NO': 'Norway',
4430         'OM': 'Oman',
4431         'PK': 'Pakistan',
4432         'PW': 'Palau',
4433         'PS': 'Palestine, State of',
4434         'PA': 'Panama',
4435         'PG': 'Papua New Guinea',
4436         'PY': 'Paraguay',
4437         'PE': 'Peru',
4438         'PH': 'Philippines',
4439         'PN': 'Pitcairn',
4440         'PL': 'Poland',
4441         'PT': 'Portugal',
4442         'PR': 'Puerto Rico',
4443         'QA': 'Qatar',
4444         'RE': 'Réunion',
4445         'RO': 'Romania',
4446         'RU': 'Russian Federation',
4447         'RW': 'Rwanda',
4448         'BL': 'Saint Barthélemy',
4449         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4450         'KN': 'Saint Kitts and Nevis',
4451         'LC': 'Saint Lucia',
4452         'MF': 'Saint Martin (French part)',
4453         'PM': 'Saint Pierre and Miquelon',
4454         'VC': 'Saint Vincent and the Grenadines',
4455         'WS': 'Samoa',
4456         'SM': 'San Marino',
4457         'ST': 'Sao Tome and Principe',
4458         'SA': 'Saudi Arabia',
4459         'SN': 'Senegal',
4460         'RS': 'Serbia',
4461         'SC': 'Seychelles',
4462         'SL': 'Sierra Leone',
4463         'SG': 'Singapore',
4464         'SX': 'Sint Maarten (Dutch part)',
4465         'SK': 'Slovakia',
4466         'SI': 'Slovenia',
4467         'SB': 'Solomon Islands',
4468         'SO': 'Somalia',
4469         'ZA': 'South Africa',
4470         'GS': 'South Georgia and the South Sandwich Islands',
4471         'SS': 'South Sudan',
4472         'ES': 'Spain',
4473         'LK': 'Sri Lanka',
4474         'SD': 'Sudan',
4475         'SR': 'Suriname',
4476         'SJ': 'Svalbard and Jan Mayen',
4477         'SZ': 'Swaziland',
4478         'SE': 'Sweden',
4479         'CH': 'Switzerland',
4480         'SY': 'Syrian Arab Republic',
4481         'TW': 'Taiwan, Province of China',
4482         'TJ': 'Tajikistan',
4483         'TZ': 'Tanzania, United Republic of',
4484         'TH': 'Thailand',
4485         'TL': 'Timor-Leste',
4486         'TG': 'Togo',
4487         'TK': 'Tokelau',
4488         'TO': 'Tonga',
4489         'TT': 'Trinidad and Tobago',
4490         'TN': 'Tunisia',
4491         'TR': 'Turkey',
4492         'TM': 'Turkmenistan',
4493         'TC': 'Turks and Caicos Islands',
4494         'TV': 'Tuvalu',
4495         'UG': 'Uganda',
4496         'UA': 'Ukraine',
4497         'AE': 'United Arab Emirates',
4498         'GB': 'United Kingdom',
4499         'US': 'United States',
4500         'UM': 'United States Minor Outlying Islands',
4501         'UY': 'Uruguay',
4502         'UZ': 'Uzbekistan',
4503         'VU': 'Vanuatu',
4504         'VE': 'Venezuela, Bolivarian Republic of',
4505         'VN': 'Viet Nam',
4506         'VG': 'Virgin Islands, British',
4507         'VI': 'Virgin Islands, U.S.',
4508         'WF': 'Wallis and Futuna',
4509         'EH': 'Western Sahara',
4510         'YE': 'Yemen',
4511         'ZM': 'Zambia',
4512         'ZW': 'Zimbabwe',
4513         # Not ISO 3166 codes, but used for IP blocks
4514         'AP': 'Asia/Pacific Region',
4515         'EU': 'Europe',
4516     }
4517
4518     @classmethod
4519     def short2full(cls, code):
4520         """Convert an ISO 3166-2 country code to the corresponding full name"""
4521         return cls._country_map.get(code.upper())
4522
4523
4524 class GeoUtils:
4525     # Major IPv4 address blocks per country
4526     _country_ip_map = {
4527         'AD': '46.172.224.0/19',
4528         'AE': '94.200.0.0/13',
4529         'AF': '149.54.0.0/17',
4530         'AG': '209.59.64.0/18',
4531         'AI': '204.14.248.0/21',
4532         'AL': '46.99.0.0/16',
4533         'AM': '46.70.0.0/15',
4534         'AO': '105.168.0.0/13',
4535         'AP': '182.50.184.0/21',
4536         'AQ': '23.154.160.0/24',
4537         'AR': '181.0.0.0/12',
4538         'AS': '202.70.112.0/20',
4539         'AT': '77.116.0.0/14',
4540         'AU': '1.128.0.0/11',
4541         'AW': '181.41.0.0/18',
4542         'AX': '185.217.4.0/22',
4543         'AZ': '5.197.0.0/16',
4544         'BA': '31.176.128.0/17',
4545         'BB': '65.48.128.0/17',
4546         'BD': '114.130.0.0/16',
4547         'BE': '57.0.0.0/8',
4548         'BF': '102.178.0.0/15',
4549         'BG': '95.42.0.0/15',
4550         'BH': '37.131.0.0/17',
4551         'BI': '154.117.192.0/18',
4552         'BJ': '137.255.0.0/16',
4553         'BL': '185.212.72.0/23',
4554         'BM': '196.12.64.0/18',
4555         'BN': '156.31.0.0/16',
4556         'BO': '161.56.0.0/16',
4557         'BQ': '161.0.80.0/20',
4558         'BR': '191.128.0.0/12',
4559         'BS': '24.51.64.0/18',
4560         'BT': '119.2.96.0/19',
4561         'BW': '168.167.0.0/16',
4562         'BY': '178.120.0.0/13',
4563         'BZ': '179.42.192.0/18',
4564         'CA': '99.224.0.0/11',
4565         'CD': '41.243.0.0/16',
4566         'CF': '197.242.176.0/21',
4567         'CG': '160.113.0.0/16',
4568         'CH': '85.0.0.0/13',
4569         'CI': '102.136.0.0/14',
4570         'CK': '202.65.32.0/19',
4571         'CL': '152.172.0.0/14',
4572         'CM': '102.244.0.0/14',
4573         'CN': '36.128.0.0/10',
4574         'CO': '181.240.0.0/12',
4575         'CR': '201.192.0.0/12',
4576         'CU': '152.206.0.0/15',
4577         'CV': '165.90.96.0/19',
4578         'CW': '190.88.128.0/17',
4579         'CY': '31.153.0.0/16',
4580         'CZ': '88.100.0.0/14',
4581         'DE': '53.0.0.0/8',
4582         'DJ': '197.241.0.0/17',
4583         'DK': '87.48.0.0/12',
4584         'DM': '192.243.48.0/20',
4585         'DO': '152.166.0.0/15',
4586         'DZ': '41.96.0.0/12',
4587         'EC': '186.68.0.0/15',
4588         'EE': '90.190.0.0/15',
4589         'EG': '156.160.0.0/11',
4590         'ER': '196.200.96.0/20',
4591         'ES': '88.0.0.0/11',
4592         'ET': '196.188.0.0/14',
4593         'EU': '2.16.0.0/13',
4594         'FI': '91.152.0.0/13',
4595         'FJ': '144.120.0.0/16',
4596         'FK': '80.73.208.0/21',
4597         'FM': '119.252.112.0/20',
4598         'FO': '88.85.32.0/19',
4599         'FR': '90.0.0.0/9',
4600         'GA': '41.158.0.0/15',
4601         'GB': '25.0.0.0/8',
4602         'GD': '74.122.88.0/21',
4603         'GE': '31.146.0.0/16',
4604         'GF': '161.22.64.0/18',
4605         'GG': '62.68.160.0/19',
4606         'GH': '154.160.0.0/12',
4607         'GI': '95.164.0.0/16',
4608         'GL': '88.83.0.0/19',
4609         'GM': '160.182.0.0/15',
4610         'GN': '197.149.192.0/18',
4611         'GP': '104.250.0.0/19',
4612         'GQ': '105.235.224.0/20',
4613         'GR': '94.64.0.0/13',
4614         'GT': '168.234.0.0/16',
4615         'GU': '168.123.0.0/16',
4616         'GW': '197.214.80.0/20',
4617         'GY': '181.41.64.0/18',
4618         'HK': '113.252.0.0/14',
4619         'HN': '181.210.0.0/16',
4620         'HR': '93.136.0.0/13',
4621         'HT': '148.102.128.0/17',
4622         'HU': '84.0.0.0/14',
4623         'ID': '39.192.0.0/10',
4624         'IE': '87.32.0.0/12',
4625         'IL': '79.176.0.0/13',
4626         'IM': '5.62.80.0/20',
4627         'IN': '117.192.0.0/10',
4628         'IO': '203.83.48.0/21',
4629         'IQ': '37.236.0.0/14',
4630         'IR': '2.176.0.0/12',
4631         'IS': '82.221.0.0/16',
4632         'IT': '79.0.0.0/10',
4633         'JE': '87.244.64.0/18',
4634         'JM': '72.27.0.0/17',
4635         'JO': '176.29.0.0/16',
4636         'JP': '133.0.0.0/8',
4637         'KE': '105.48.0.0/12',
4638         'KG': '158.181.128.0/17',
4639         'KH': '36.37.128.0/17',
4640         'KI': '103.25.140.0/22',
4641         'KM': '197.255.224.0/20',
4642         'KN': '198.167.192.0/19',
4643         'KP': '175.45.176.0/22',
4644         'KR': '175.192.0.0/10',
4645         'KW': '37.36.0.0/14',
4646         'KY': '64.96.0.0/15',
4647         'KZ': '2.72.0.0/13',
4648         'LA': '115.84.64.0/18',
4649         'LB': '178.135.0.0/16',
4650         'LC': '24.92.144.0/20',
4651         'LI': '82.117.0.0/19',
4652         'LK': '112.134.0.0/15',
4653         'LR': '102.183.0.0/16',
4654         'LS': '129.232.0.0/17',
4655         'LT': '78.56.0.0/13',
4656         'LU': '188.42.0.0/16',
4657         'LV': '46.109.0.0/16',
4658         'LY': '41.252.0.0/14',
4659         'MA': '105.128.0.0/11',
4660         'MC': '88.209.64.0/18',
4661         'MD': '37.246.0.0/16',
4662         'ME': '178.175.0.0/17',
4663         'MF': '74.112.232.0/21',
4664         'MG': '154.126.0.0/17',
4665         'MH': '117.103.88.0/21',
4666         'MK': '77.28.0.0/15',
4667         'ML': '154.118.128.0/18',
4668         'MM': '37.111.0.0/17',
4669         'MN': '49.0.128.0/17',
4670         'MO': '60.246.0.0/16',
4671         'MP': '202.88.64.0/20',
4672         'MQ': '109.203.224.0/19',
4673         'MR': '41.188.64.0/18',
4674         'MS': '208.90.112.0/22',
4675         'MT': '46.11.0.0/16',
4676         'MU': '105.16.0.0/12',
4677         'MV': '27.114.128.0/18',
4678         'MW': '102.70.0.0/15',
4679         'MX': '187.192.0.0/11',
4680         'MY': '175.136.0.0/13',
4681         'MZ': '197.218.0.0/15',
4682         'NA': '41.182.0.0/16',
4683         'NC': '101.101.0.0/18',
4684         'NE': '197.214.0.0/18',
4685         'NF': '203.17.240.0/22',
4686         'NG': '105.112.0.0/12',
4687         'NI': '186.76.0.0/15',
4688         'NL': '145.96.0.0/11',
4689         'NO': '84.208.0.0/13',
4690         'NP': '36.252.0.0/15',
4691         'NR': '203.98.224.0/19',
4692         'NU': '49.156.48.0/22',
4693         'NZ': '49.224.0.0/14',
4694         'OM': '5.36.0.0/15',
4695         'PA': '186.72.0.0/15',
4696         'PE': '186.160.0.0/14',
4697         'PF': '123.50.64.0/18',
4698         'PG': '124.240.192.0/19',
4699         'PH': '49.144.0.0/13',
4700         'PK': '39.32.0.0/11',
4701         'PL': '83.0.0.0/11',
4702         'PM': '70.36.0.0/20',
4703         'PR': '66.50.0.0/16',
4704         'PS': '188.161.0.0/16',
4705         'PT': '85.240.0.0/13',
4706         'PW': '202.124.224.0/20',
4707         'PY': '181.120.0.0/14',
4708         'QA': '37.210.0.0/15',
4709         'RE': '102.35.0.0/16',
4710         'RO': '79.112.0.0/13',
4711         'RS': '93.86.0.0/15',
4712         'RU': '5.136.0.0/13',
4713         'RW': '41.186.0.0/16',
4714         'SA': '188.48.0.0/13',
4715         'SB': '202.1.160.0/19',
4716         'SC': '154.192.0.0/11',
4717         'SD': '102.120.0.0/13',
4718         'SE': '78.64.0.0/12',
4719         'SG': '8.128.0.0/10',
4720         'SI': '188.196.0.0/14',
4721         'SK': '78.98.0.0/15',
4722         'SL': '102.143.0.0/17',
4723         'SM': '89.186.32.0/19',
4724         'SN': '41.82.0.0/15',
4725         'SO': '154.115.192.0/18',
4726         'SR': '186.179.128.0/17',
4727         'SS': '105.235.208.0/21',
4728         'ST': '197.159.160.0/19',
4729         'SV': '168.243.0.0/16',
4730         'SX': '190.102.0.0/20',
4731         'SY': '5.0.0.0/16',
4732         'SZ': '41.84.224.0/19',
4733         'TC': '65.255.48.0/20',
4734         'TD': '154.68.128.0/19',
4735         'TG': '196.168.0.0/14',
4736         'TH': '171.96.0.0/13',
4737         'TJ': '85.9.128.0/18',
4738         'TK': '27.96.24.0/21',
4739         'TL': '180.189.160.0/20',
4740         'TM': '95.85.96.0/19',
4741         'TN': '197.0.0.0/11',
4742         'TO': '175.176.144.0/21',
4743         'TR': '78.160.0.0/11',
4744         'TT': '186.44.0.0/15',
4745         'TV': '202.2.96.0/19',
4746         'TW': '120.96.0.0/11',
4747         'TZ': '156.156.0.0/14',
4748         'UA': '37.52.0.0/14',
4749         'UG': '102.80.0.0/13',
4750         'US': '6.0.0.0/8',
4751         'UY': '167.56.0.0/13',
4752         'UZ': '84.54.64.0/18',
4753         'VA': '212.77.0.0/19',
4754         'VC': '207.191.240.0/21',
4755         'VE': '186.88.0.0/13',
4756         'VG': '66.81.192.0/20',
4757         'VI': '146.226.0.0/16',
4758         'VN': '14.160.0.0/11',
4759         'VU': '202.80.32.0/20',
4760         'WF': '117.20.32.0/21',
4761         'WS': '202.4.32.0/19',
4762         'YE': '134.35.0.0/16',
4763         'YT': '41.242.116.0/22',
4764         'ZA': '41.0.0.0/11',
4765         'ZM': '102.144.0.0/13',
4766         'ZW': '102.177.192.0/18',
4767     }
4768
4769     @classmethod
4770     def random_ipv4(cls, code_or_block):
4771         if len(code_or_block) == 2:
4772             block = cls._country_ip_map.get(code_or_block.upper())
4773             if not block:
4774                 return None
4775         else:
4776             block = code_or_block
4777         addr, preflen = block.split('/')
4778         addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
4779         addr_max = addr_min | (0xffffffff >> int(preflen))
4780         return str(socket.inet_ntoa(
4781             struct.pack('!L', random.randint(addr_min, addr_max))))
4782
4783
4784 class PerRequestProxyHandler(urllib.request.ProxyHandler):
4785     def __init__(self, proxies=None):
4786         # Set default handlers
4787         for type in ('http', 'https'):
4788             setattr(self, '%s_open' % type,
4789                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4790                         meth(r, proxy, type))
4791         urllib.request.ProxyHandler.__init__(self, proxies)
4792
4793     def proxy_open(self, req, proxy, type):
4794         req_proxy = req.headers.get('Ytdl-request-proxy')
4795         if req_proxy is not None:
4796             proxy = req_proxy
4797             del req.headers['Ytdl-request-proxy']
4798
4799         if proxy == '__noproxy__':
4800             return None  # No Proxy
4801         if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4802             req.add_header('Ytdl-socks-proxy', proxy)
4803             # yt-dlp's http/https handlers do wrapping the socket with socks
4804             return None
4805         return urllib.request.ProxyHandler.proxy_open(
4806             self, req, proxy, type)
4807
4808
4809 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4810 # released into Public Domain
4811 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4812
4813 def long_to_bytes(n, blocksize=0):
4814     """long_to_bytes(n:long, blocksize:int) : string
4815     Convert a long integer to a byte string.
4816
4817     If optional blocksize is given and greater than zero, pad the front of the
4818     byte string with binary zeros so that the length is a multiple of
4819     blocksize.
4820     """
4821     # after much testing, this algorithm was deemed to be the fastest
4822     s = b''
4823     n = int(n)
4824     while n > 0:
4825         s = struct.pack('>I', n & 0xffffffff) + s
4826         n = n >> 32
4827     # strip off leading zeros
4828     for i in range(len(s)):
4829         if s[i] != b'\000'[0]:
4830             break
4831     else:
4832         # only happens when n == 0
4833         s = b'\000'
4834         i = 0
4835     s = s[i:]
4836     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4837     # de-padding being done above, but sigh...
4838     if blocksize > 0 and len(s) % blocksize:
4839         s = (blocksize - len(s) % blocksize) * b'\000' + s
4840     return s
4841
4842
4843 def bytes_to_long(s):
4844     """bytes_to_long(string) : long
4845     Convert a byte string to a long integer.
4846
4847     This is (essentially) the inverse of long_to_bytes().
4848     """
4849     acc = 0
4850     length = len(s)
4851     if length % 4:
4852         extra = (4 - length % 4)
4853         s = b'\000' * extra + s
4854         length = length + extra
4855     for i in range(0, length, 4):
4856         acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
4857     return acc
4858
4859
4860 def ohdave_rsa_encrypt(data, exponent, modulus):
4861     '''
4862     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4863
4864     Input:
4865         data: data to encrypt, bytes-like object
4866         exponent, modulus: parameter e and N of RSA algorithm, both integer
4867     Output: hex string of encrypted data
4868
4869     Limitation: supports one block encryption only
4870     '''
4871
4872     payload = int(binascii.hexlify(data[::-1]), 16)
4873     encrypted = pow(payload, exponent, modulus)
4874     return '%x' % encrypted
4875
4876
4877 def pkcs1pad(data, length):
4878     """
4879     Padding input data with PKCS#1 scheme
4880
4881     @param {int[]} data        input data
4882     @param {int}   length      target length
4883     @returns {int[]}           padded data
4884     """
4885     if len(data) > length - 11:
4886         raise ValueError('Input data too long for PKCS#1 padding')
4887
4888     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4889     return [0, 2] + pseudo_random + [0] + data
4890
4891
4892 def _base_n_table(n, table):
4893     if not table and not n:
4894         raise ValueError('Either table or n must be specified')
4895     table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
4896
4897     if n and n != len(table):
4898         raise ValueError(f'base {n} exceeds table length {len(table)}')
4899     return table
4900
4901
4902 def encode_base_n(num, n=None, table=None):
4903     """Convert given int to a base-n string"""
4904     table = _base_n_table(n, table)
4905     if not num:
4906         return table[0]
4907
4908     result, base = '', len(table)
4909     while num:
4910         result = table[num % base] + result
4911         num = num // base
4912     return result
4913
4914
4915 def decode_base_n(string, n=None, table=None):
4916     """Convert given base-n string to int"""
4917     table = {char: index for index, char in enumerate(_base_n_table(n, table))}
4918     result, base = 0, len(table)
4919     for char in string:
4920         result = result * base + table[char]
4921     return result
4922
4923
4924 def decode_base(value, digits):
4925     deprecation_warning(f'{__name__}.decode_base is deprecated and may be removed '
4926                         f'in a future version. Use {__name__}.decode_base_n instead')
4927     return decode_base_n(value, table=digits)
4928
4929
4930 def decode_packed_codes(code):
4931     mobj = re.search(PACKED_CODES_RE, code)
4932     obfuscated_code, base, count, symbols = mobj.groups()
4933     base = int(base)
4934     count = int(count)
4935     symbols = symbols.split('|')
4936     symbol_table = {}
4937
4938     while count:
4939         count -= 1
4940         base_n_count = encode_base_n(count, base)
4941         symbol_table[base_n_count] = symbols[count] or base_n_count
4942
4943     return re.sub(
4944         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4945         obfuscated_code)
4946
4947
4948 def caesar(s, alphabet, shift):
4949     if shift == 0:
4950         return s
4951     l = len(alphabet)
4952     return ''.join(
4953         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
4954         for c in s)
4955
4956
4957 def rot47(s):
4958     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4959
4960
4961 def parse_m3u8_attributes(attrib):
4962     info = {}
4963     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
4964         if val.startswith('"'):
4965             val = val[1:-1]
4966         info[key] = val
4967     return info
4968
4969
4970 def urshift(val, n):
4971     return val >> n if val >= 0 else (val + 0x100000000) >> n
4972
4973
4974 # Based on png2str() written by @gdkchan and improved by @yokrysty
4975 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
4976 def decode_png(png_data):
4977     # Reference: https://www.w3.org/TR/PNG/
4978     header = png_data[8:]
4979
4980     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
4981         raise OSError('Not a valid PNG file.')
4982
4983     int_map = {1: '>B', 2: '>H', 4: '>I'}
4984     unpack_integer = lambda x: struct.unpack(int_map[len(x)], x)[0]
4985
4986     chunks = []
4987
4988     while header:
4989         length = unpack_integer(header[:4])
4990         header = header[4:]
4991
4992         chunk_type = header[:4]
4993         header = header[4:]
4994
4995         chunk_data = header[:length]
4996         header = header[length:]
4997
4998         header = header[4:]  # Skip CRC
4999
5000         chunks.append({
5001             'type': chunk_type,
5002             'length': length,
5003             'data': chunk_data
5004         })
5005
5006     ihdr = chunks[0]['data']
5007
5008     width = unpack_integer(ihdr[:4])
5009     height = unpack_integer(ihdr[4:8])
5010
5011     idat = b''
5012
5013     for chunk in chunks:
5014         if chunk['type'] == b'IDAT':
5015             idat += chunk['data']
5016
5017     if not idat:
5018         raise OSError('Unable to read PNG data.')
5019
5020     decompressed_data = bytearray(zlib.decompress(idat))
5021
5022     stride = width * 3
5023     pixels = []
5024
5025     def _get_pixel(idx):
5026         x = idx % stride
5027         y = idx // stride
5028         return pixels[y][x]
5029
5030     for y in range(height):
5031         basePos = y * (1 + stride)
5032         filter_type = decompressed_data[basePos]
5033
5034         current_row = []
5035
5036         pixels.append(current_row)
5037
5038         for x in range(stride):
5039             color = decompressed_data[1 + basePos + x]
5040             basex = y * stride + x
5041             left = 0
5042             up = 0
5043
5044             if x > 2:
5045                 left = _get_pixel(basex - 3)
5046             if y > 0:
5047                 up = _get_pixel(basex - stride)
5048
5049             if filter_type == 1:  # Sub
5050                 color = (color + left) & 0xff
5051             elif filter_type == 2:  # Up
5052                 color = (color + up) & 0xff
5053             elif filter_type == 3:  # Average
5054                 color = (color + ((left + up) >> 1)) & 0xff
5055             elif filter_type == 4:  # Paeth
5056                 a = left
5057                 b = up
5058                 c = 0
5059
5060                 if x > 2 and y > 0:
5061                     c = _get_pixel(basex - stride - 3)
5062
5063                 p = a + b - c
5064
5065                 pa = abs(p - a)
5066                 pb = abs(p - b)
5067                 pc = abs(p - c)
5068
5069                 if pa <= pb and pa <= pc:
5070                     color = (color + a) & 0xff
5071                 elif pb <= pc:
5072                     color = (color + b) & 0xff
5073                 else:
5074                     color = (color + c) & 0xff
5075
5076             current_row.append(color)
5077
5078     return width, height, pixels
5079
5080
5081 def write_xattr(path, key, value):
5082     # Windows: Write xattrs to NTFS Alternate Data Streams:
5083     # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5084     if compat_os_name == 'nt':
5085         assert ':' not in key
5086         assert os.path.exists(path)
5087
5088         try:
5089             with open(f'{path}:{key}', 'wb') as f:
5090                 f.write(value)
5091         except OSError as e:
5092             raise XAttrMetadataError(e.errno, e.strerror)
5093         return
5094
5095     # UNIX Method 1. Use xattrs/pyxattrs modules
5096
5097     setxattr = None
5098     if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
5099         # Unicode arguments are not supported in pyxattr until version 0.5.0
5100         # See https://github.com/ytdl-org/youtube-dl/issues/5498
5101         if version_tuple(xattr.__version__) >= (0, 5, 0):
5102             setxattr = xattr.set
5103     elif xattr:
5104         setxattr = xattr.setxattr
5105
5106     if setxattr:
5107         try:
5108             setxattr(path, key, value)
5109         except OSError as e:
5110             raise XAttrMetadataError(e.errno, e.strerror)
5111         return
5112
5113     # UNIX Method 2. Use setfattr/xattr executables
5114     exe = ('setfattr' if check_executable('setfattr', ['--version'])
5115            else 'xattr' if check_executable('xattr', ['-h']) else None)
5116     if not exe:
5117         raise XAttrUnavailableError(
5118             'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
5119             + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
5120
5121     value = value.decode()
5122     try:
5123         _, stderr, returncode = Popen.run(
5124             [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
5125             text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5126     except OSError as e:
5127         raise XAttrMetadataError(e.errno, e.strerror)
5128     if returncode:
5129         raise XAttrMetadataError(returncode, stderr)
5130
5131
5132 def random_birthday(year_field, month_field, day_field):
5133     start_date = datetime.date(1950, 1, 1)
5134     end_date = datetime.date(1995, 12, 31)
5135     offset = random.randint(0, (end_date - start_date).days)
5136     random_date = start_date + datetime.timedelta(offset)
5137     return {
5138         year_field: str(random_date.year),
5139         month_field: str(random_date.month),
5140         day_field: str(random_date.day),
5141     }
5142
5143
5144 # Templates for internet shortcut files, which are plain text files.
5145 DOT_URL_LINK_TEMPLATE = '''\
5146 [InternetShortcut]
5147 URL=%(url)s
5148 '''
5149
5150 DOT_WEBLOC_LINK_TEMPLATE = '''\
5151 <?xml version="1.0" encoding="UTF-8"?>
5152 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5153 <plist version="1.0">
5154 <dict>
5155 \t<key>URL</key>
5156 \t<string>%(url)s</string>
5157 </dict>
5158 </plist>
5159 '''
5160
5161 DOT_DESKTOP_LINK_TEMPLATE = '''\
5162 [Desktop Entry]
5163 Encoding=UTF-8
5164 Name=%(filename)s
5165 Type=Link
5166 URL=%(url)s
5167 Icon=text-html
5168 '''
5169
5170 LINK_TEMPLATES = {
5171     'url': DOT_URL_LINK_TEMPLATE,
5172     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
5173     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
5174 }
5175
5176
5177 def iri_to_uri(iri):
5178     """
5179     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5180
5181     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5182     """
5183
5184     iri_parts = urllib.parse.urlparse(iri)
5185
5186     if '[' in iri_parts.netloc:
5187         raise ValueError('IPv6 URIs are not, yet, supported.')
5188         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5189
5190     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5191
5192     net_location = ''
5193     if iri_parts.username:
5194         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
5195         if iri_parts.password is not None:
5196             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
5197         net_location += '@'
5198
5199     net_location += iri_parts.hostname.encode('idna').decode()  # Punycode for Unicode hostnames.
5200     # The 'idna' encoding produces ASCII text.
5201     if iri_parts.port is not None and iri_parts.port != 80:
5202         net_location += ':' + str(iri_parts.port)
5203
5204     return urllib.parse.urlunparse(
5205         (iri_parts.scheme,
5206             net_location,
5207
5208             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5209
5210             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5211             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5212
5213             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5214             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5215
5216             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5217
5218     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5219
5220
5221 def to_high_limit_path(path):
5222     if sys.platform in ['win32', 'cygwin']:
5223         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5224         return '\\\\?\\' + os.path.abspath(path)
5225
5226     return path
5227
5228
5229 def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
5230     val = traverse_obj(obj, *variadic(field))
5231     if (not val and val != 0) if ignore is NO_DEFAULT else val in variadic(ignore):
5232         return default
5233     return template % func(val)
5234
5235
5236 def clean_podcast_url(url):
5237     return re.sub(r'''(?x)
5238         (?:
5239             (?:
5240                 chtbl\.com/track|
5241                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5242                 play\.podtrac\.com
5243             )/[^/]+|
5244             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5245             flex\.acast\.com|
5246             pd(?:
5247                 cn\.co| # https://podcorn.com/analytics-prefix/
5248                 st\.fm # https://podsights.com/docs/
5249             )/e
5250         )/''', '', url)
5251
5252
5253 _HEX_TABLE = '0123456789abcdef'
5254
5255
5256 def random_uuidv4():
5257     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5258
5259
5260 def make_dir(path, to_screen=None):
5261     try:
5262         dn = os.path.dirname(path)
5263         if dn and not os.path.exists(dn):
5264             os.makedirs(dn)
5265         return True
5266     except OSError as err:
5267         if callable(to_screen) is not None:
5268             to_screen('unable to create directory ' + error_to_compat_str(err))
5269         return False
5270
5271
5272 def get_executable_path():
5273     from .update import _get_variant_and_executable_path
5274
5275     return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
5276
5277
5278 def load_plugins(name, suffix, namespace):
5279     classes = {}
5280     with contextlib.suppress(FileNotFoundError):
5281         plugins_spec = importlib.util.spec_from_file_location(
5282             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
5283         plugins = importlib.util.module_from_spec(plugins_spec)
5284         sys.modules[plugins_spec.name] = plugins
5285         plugins_spec.loader.exec_module(plugins)
5286         for name in dir(plugins):
5287             if name in namespace:
5288                 continue
5289             if not name.endswith(suffix):
5290                 continue
5291             klass = getattr(plugins, name)
5292             classes[name] = namespace[name] = klass
5293     return classes
5294
5295
5296 def traverse_obj(
5297         obj, *paths, default=None, expected_type=None, get_all=True,
5298         casesense=True, is_user_input=False, traverse_string=False):
5299     """
5300     Safely traverse nested `dict`s and `Sequence`s
5301
5302     >>> obj = [{}, {"key": "value"}]
5303     >>> traverse_obj(obj, (1, "key"))
5304     "value"
5305
5306     Each of the provided `paths` is tested and the first producing a valid result will be returned.
5307     A value of None is treated as the absence of a value.
5308
5309     The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
5310
5311     The keys in the path can be one of:
5312         - `None`:           Return the current object.
5313         - `str`/`int`:      Return `obj[key]`.
5314         - `slice`:          Branch out and return all values in `obj[key]`.
5315         - `Ellipsis`:       Branch out and return a list of all values.
5316         - `tuple`/`list`:   Branch out and return a list of all matching values.
5317                             Read as: `[traverse_obj(obj, branch) for branch in branches]`.
5318         - `function`:       Branch out and return values filtered by the function.
5319                             Read as: `[value for key, value in obj if function(key, value)]`.
5320                             For `Sequence`s, `key` is the index of the value.
5321         - `dict`            Transform the current object and return a matching dict.
5322                             Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
5323
5324         `tuple`, `list`, and `dict` all support nested paths and branches
5325
5326     @params paths           Paths which to traverse by.
5327     @param default          Value to return if the paths do not match.
5328     @param expected_type    If a `type`, only accept final values of this type.
5329                             If any other callable, try to call the function on each result.
5330     @param get_all          If `False`, return the first matching result, otherwise all matching ones.
5331     @param casesense        If `False`, consider string dictionary keys as case insensitive.
5332
5333     The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
5334
5335     @param is_user_input    Whether the keys are generated from user input.
5336                             If `True` strings get converted to `int`/`slice` if needed.
5337     @param traverse_string  Whether to traverse into objects as strings.
5338                             If `True`, any non-compatible object will first be
5339                             converted into a string and then traversed into.
5340
5341
5342     @returns                The result of the object traversal.
5343                             If successful, `get_all=True`, and the path branches at least once,
5344                             then a list of results is returned instead.
5345     """
5346     is_sequence = lambda x: isinstance(x, collections.abc.Sequence) and not isinstance(x, (str, bytes))
5347     casefold = lambda k: k.casefold() if isinstance(k, str) else k
5348
5349     if isinstance(expected_type, type):
5350         type_test = lambda val: val if isinstance(val, expected_type) else None
5351     else:
5352         type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
5353
5354     def apply_key(key, obj):
5355         if obj is None:
5356             return
5357
5358         elif key is None:
5359             yield obj
5360
5361         elif isinstance(key, (list, tuple)):
5362             for branch in key:
5363                 _, result = apply_path(obj, branch)
5364                 yield from result
5365
5366         elif key is ...:
5367             if isinstance(obj, collections.abc.Mapping):
5368                 yield from obj.values()
5369             elif is_sequence(obj):
5370                 yield from obj
5371             elif traverse_string:
5372                 yield from str(obj)
5373
5374         elif callable(key):
5375             if is_sequence(obj):
5376                 iter_obj = enumerate(obj)
5377             elif isinstance(obj, collections.abc.Mapping):
5378                 iter_obj = obj.items()
5379             elif traverse_string:
5380                 iter_obj = enumerate(str(obj))
5381             else:
5382                 return
5383             yield from (v for k, v in iter_obj if try_call(key, args=(k, v)))
5384
5385         elif isinstance(key, dict):
5386             iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items())
5387             yield {k: v if v is not None else default for k, v in iter_obj
5388                    if v is not None or default is not None}
5389
5390         elif isinstance(obj, dict):
5391             yield (obj.get(key) if casesense or (key in obj)
5392                    else next((v for k, v in obj.items() if casefold(k) == key), None))
5393
5394         else:
5395             if is_user_input:
5396                 key = (int_or_none(key) if ':' not in key
5397                        else slice(*map(int_or_none, key.split(':'))))
5398
5399             if not isinstance(key, (int, slice)):
5400                 return
5401
5402             if not is_sequence(obj):
5403                 if not traverse_string:
5404                     return
5405                 obj = str(obj)
5406
5407             with contextlib.suppress(IndexError):
5408                 yield obj[key]
5409
5410     def apply_path(start_obj, path):
5411         objs = (start_obj,)
5412         has_branched = False
5413
5414         for key in variadic(path):
5415             if is_user_input and key == ':':
5416                 key = ...
5417
5418             if not casesense and isinstance(key, str):
5419                 key = key.casefold()
5420
5421             if key is ... or isinstance(key, (list, tuple)) or callable(key):
5422                 has_branched = True
5423
5424             key_func = functools.partial(apply_key, key)
5425             objs = itertools.chain.from_iterable(map(key_func, objs))
5426
5427         return has_branched, objs
5428
5429     def _traverse_obj(obj, path):
5430         has_branched, results = apply_path(obj, path)
5431         results = LazyList(x for x in map(type_test, results) if x is not None)
5432         if results:
5433             return results.exhaust() if get_all and has_branched else results[0]
5434
5435     for path in paths:
5436         result = _traverse_obj(obj, path)
5437         if result is not None:
5438             return result
5439
5440     return default
5441
5442
5443 def traverse_dict(dictn, keys, casesense=True):
5444     deprecation_warning(f'"{__name__}.traverse_dict" is deprecated and may be removed '
5445                         f'in a future version. Use "{__name__}.traverse_obj" instead')
5446     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5447
5448
5449 def get_first(obj, keys, **kwargs):
5450     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5451
5452
5453 def time_seconds(**kwargs):
5454     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5455     return t.timestamp()
5456
5457
5458 # create a JSON Web Signature (jws) with HS256 algorithm
5459 # the resulting format is in JWS Compact Serialization
5460 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5461 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5462 def jwt_encode_hs256(payload_data, key, headers={}):
5463     header_data = {
5464         'alg': 'HS256',
5465         'typ': 'JWT',
5466     }
5467     if headers:
5468         header_data.update(headers)
5469     header_b64 = base64.b64encode(json.dumps(header_data).encode())
5470     payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
5471     h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
5472     signature_b64 = base64.b64encode(h.digest())
5473     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5474     return token
5475
5476
5477 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5478 def jwt_decode_hs256(jwt):
5479     header_b64, payload_b64, signature_b64 = jwt.split('.')
5480     payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
5481     return payload_data
5482
5483
5484 WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
5485
5486
5487 @functools.cache
5488 def supports_terminal_sequences(stream):
5489     if compat_os_name == 'nt':
5490         if not WINDOWS_VT_MODE:
5491             return False
5492     elif not os.getenv('TERM'):
5493         return False
5494     try:
5495         return stream.isatty()
5496     except BaseException:
5497         return False
5498
5499
5500 def windows_enable_vt_mode():  # TODO: Do this the proper way https://bugs.python.org/issue30075
5501     if get_windows_version() < (10, 0, 10586):
5502         return
5503     global WINDOWS_VT_MODE
5504     try:
5505         Popen.run('', shell=True)
5506     except Exception:
5507         return
5508
5509     WINDOWS_VT_MODE = True
5510     supports_terminal_sequences.cache_clear()
5511
5512
5513 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5514
5515
5516 def remove_terminal_sequences(string):
5517     return _terminal_sequences_re.sub('', string)
5518
5519
5520 def number_of_digits(number):
5521     return len('%d' % number)
5522
5523
5524 def join_nonempty(*values, delim='-', from_dict=None):
5525     if from_dict is not None:
5526         values = (traverse_obj(from_dict, variadic(v)) for v in values)
5527     return delim.join(map(str, filter(None, values)))
5528
5529
5530 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5531     """
5532     Find the largest format dimensions in terms of video width and, for each thumbnail:
5533     * Modify the URL: Match the width with the provided regex and replace with the former width
5534     * Update dimensions
5535
5536     This function is useful with video services that scale the provided thumbnails on demand
5537     """
5538     _keys = ('width', 'height')
5539     max_dimensions = max(
5540         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5541         default=(0, 0))
5542     if not max_dimensions[0]:
5543         return thumbnails
5544     return [
5545         merge_dicts(
5546             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5547             dict(zip(_keys, max_dimensions)), thumbnail)
5548         for thumbnail in thumbnails
5549     ]
5550
5551
5552 def parse_http_range(range):
5553     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5554     if not range:
5555         return None, None, None
5556     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5557     if not crg:
5558         return None, None, None
5559     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5560
5561
5562 def read_stdin(what):
5563     eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
5564     write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
5565     return sys.stdin
5566
5567
5568 def determine_file_encoding(data):
5569     """
5570     Detect the text encoding used
5571     @returns (encoding, bytes to skip)
5572     """
5573
5574     # BOM marks are given priority over declarations
5575     for bom, enc in BOMS:
5576         if data.startswith(bom):
5577             return enc, len(bom)
5578
5579     # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
5580     # We ignore the endianness to get a good enough match
5581     data = data.replace(b'\0', b'')
5582     mobj = re.match(rb'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data)
5583     return mobj.group(1).decode() if mobj else None, 0
5584
5585
5586 class Config:
5587     own_args = None
5588     parsed_args = None
5589     filename = None
5590     __initialized = False
5591
5592     def __init__(self, parser, label=None):
5593         self.parser, self.label = parser, label
5594         self._loaded_paths, self.configs = set(), []
5595
5596     def init(self, args=None, filename=None):
5597         assert not self.__initialized
5598         self.own_args, self.filename = args, filename
5599         return self.load_configs()
5600
5601     def load_configs(self):
5602         directory = ''
5603         if self.filename:
5604             location = os.path.realpath(self.filename)
5605             directory = os.path.dirname(location)
5606             if location in self._loaded_paths:
5607                 return False
5608             self._loaded_paths.add(location)
5609
5610         self.__initialized = True
5611         opts, _ = self.parser.parse_known_args(self.own_args)
5612         self.parsed_args = self.own_args
5613         for location in opts.config_locations or []:
5614             if location == '-':
5615                 if location in self._loaded_paths:
5616                     continue
5617                 self._loaded_paths.add(location)
5618                 self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
5619                 continue
5620             location = os.path.join(directory, expand_path(location))
5621             if os.path.isdir(location):
5622                 location = os.path.join(location, 'yt-dlp.conf')
5623             if not os.path.exists(location):
5624                 self.parser.error(f'config location {location} does not exist')
5625             self.append_config(self.read_file(location), location)
5626         return True
5627
5628     def __str__(self):
5629         label = join_nonempty(
5630             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5631             delim=' ')
5632         return join_nonempty(
5633             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5634             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5635             delim='\n')
5636
5637     @staticmethod
5638     def read_file(filename, default=[]):
5639         try:
5640             optionf = open(filename, 'rb')
5641         except OSError:
5642             return default  # silently skip if file is not present
5643         try:
5644             enc, skip = determine_file_encoding(optionf.read(512))
5645             optionf.seek(skip, io.SEEK_SET)
5646         except OSError:
5647             enc = None  # silently skip read errors
5648         try:
5649             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5650             contents = optionf.read().decode(enc or preferredencoding())
5651             res = shlex.split(contents, comments=True)
5652         except Exception as err:
5653             raise ValueError(f'Unable to parse "{filename}": {err}')
5654         finally:
5655             optionf.close()
5656         return res
5657
5658     @staticmethod
5659     def hide_login_info(opts):
5660         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5661         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5662
5663         def _scrub_eq(o):
5664             m = eqre.match(o)
5665             if m:
5666                 return m.group('key') + '=PRIVATE'
5667             else:
5668                 return o
5669
5670         opts = list(map(_scrub_eq, opts))
5671         for idx, opt in enumerate(opts):
5672             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5673                 opts[idx + 1] = 'PRIVATE'
5674         return opts
5675
5676     def append_config(self, *args, label=None):
5677         config = type(self)(self.parser, label)
5678         config._loaded_paths = self._loaded_paths
5679         if config.init(*args):
5680             self.configs.append(config)
5681
5682     @property
5683     def all_args(self):
5684         for config in reversed(self.configs):
5685             yield from config.all_args
5686         yield from self.parsed_args or []
5687
5688     def parse_known_args(self, **kwargs):
5689         return self.parser.parse_known_args(self.all_args, **kwargs)
5690
5691     def parse_args(self):
5692         return self.parser.parse_args(self.all_args)
5693
5694
5695 class WebSocketsWrapper():
5696     """Wraps websockets module to use in non-async scopes"""
5697     pool = None
5698
5699     def __init__(self, url, headers=None, connect=True):
5700         self.loop = asyncio.new_event_loop()
5701         # XXX: "loop" is deprecated
5702         self.conn = websockets.connect(
5703             url, extra_headers=headers, ping_interval=None,
5704             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5705         if connect:
5706             self.__enter__()
5707         atexit.register(self.__exit__, None, None, None)
5708
5709     def __enter__(self):
5710         if not self.pool:
5711             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5712         return self
5713
5714     def send(self, *args):
5715         self.run_with_loop(self.pool.send(*args), self.loop)
5716
5717     def recv(self, *args):
5718         return self.run_with_loop(self.pool.recv(*args), self.loop)
5719
5720     def __exit__(self, type, value, traceback):
5721         try:
5722             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5723         finally:
5724             self.loop.close()
5725             self._cancel_all_tasks(self.loop)
5726
5727     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5728     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5729     @staticmethod
5730     def run_with_loop(main, loop):
5731         if not asyncio.iscoroutine(main):
5732             raise ValueError(f'a coroutine was expected, got {main!r}')
5733
5734         try:
5735             return loop.run_until_complete(main)
5736         finally:
5737             loop.run_until_complete(loop.shutdown_asyncgens())
5738             if hasattr(loop, 'shutdown_default_executor'):
5739                 loop.run_until_complete(loop.shutdown_default_executor())
5740
5741     @staticmethod
5742     def _cancel_all_tasks(loop):
5743         to_cancel = asyncio.all_tasks(loop)
5744
5745         if not to_cancel:
5746             return
5747
5748         for task in to_cancel:
5749             task.cancel()
5750
5751         # XXX: "loop" is removed in python 3.10+
5752         loop.run_until_complete(
5753             asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
5754
5755         for task in to_cancel:
5756             if task.cancelled():
5757                 continue
5758             if task.exception() is not None:
5759                 loop.call_exception_handler({
5760                     'message': 'unhandled exception during asyncio.run() shutdown',
5761                     'exception': task.exception(),
5762                     'task': task,
5763                 })
5764
5765
5766 def merge_headers(*dicts):
5767     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5768     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5769
5770
5771 def cached_method(f):
5772     """Cache a method"""
5773     signature = inspect.signature(f)
5774
5775     @functools.wraps(f)
5776     def wrapper(self, *args, **kwargs):
5777         bound_args = signature.bind(self, *args, **kwargs)
5778         bound_args.apply_defaults()
5779         key = tuple(bound_args.arguments.values())
5780
5781         if not hasattr(self, '__cached_method__cache'):
5782             self.__cached_method__cache = {}
5783         cache = self.__cached_method__cache.setdefault(f.__name__, {})
5784         if key not in cache:
5785             cache[key] = f(self, *args, **kwargs)
5786         return cache[key]
5787     return wrapper
5788
5789
5790 class classproperty:
5791     """property access for class methods"""
5792
5793     def __init__(self, func):
5794         functools.update_wrapper(self, func)
5795         self.func = func
5796
5797     def __get__(self, _, cls):
5798         return self.func(cls)
5799
5800
5801 class Namespace(types.SimpleNamespace):
5802     """Immutable namespace"""
5803
5804     def __iter__(self):
5805         return iter(self.__dict__.values())
5806
5807     @property
5808     def items_(self):
5809         return self.__dict__.items()
5810
5811
5812 MEDIA_EXTENSIONS = Namespace(
5813     common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
5814     video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
5815     common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
5816     audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma'),
5817     thumbnails=('jpg', 'png', 'webp'),
5818     storyboards=('mhtml', ),
5819     subtitles=('srt', 'vtt', 'ass', 'lrc'),
5820     manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
5821 )
5822 MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
5823 MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
5824
5825 KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
5826
5827
5828 class RetryManager:
5829     """Usage:
5830         for retry in RetryManager(...):
5831             try:
5832                 ...
5833             except SomeException as err:
5834                 retry.error = err
5835                 continue
5836     """
5837     attempt, _error = 0, None
5838
5839     def __init__(self, _retries, _error_callback, **kwargs):
5840         self.retries = _retries or 0
5841         self.error_callback = functools.partial(_error_callback, **kwargs)
5842
5843     def _should_retry(self):
5844         return self._error is not NO_DEFAULT and self.attempt <= self.retries
5845
5846     @property
5847     def error(self):
5848         if self._error is NO_DEFAULT:
5849             return None
5850         return self._error
5851
5852     @error.setter
5853     def error(self, value):
5854         self._error = value
5855
5856     def __iter__(self):
5857         while self._should_retry():
5858             self.error = NO_DEFAULT
5859             self.attempt += 1
5860             yield self
5861             if self.error:
5862                 self.error_callback(self.error, self.attempt, self.retries)
5863
5864     @staticmethod
5865     def report_retry(e, count, retries, *, sleep_func, info, warn, error=None, suffix=None):
5866         """Utility function for reporting retries"""
5867         if count > retries:
5868             if error:
5869                 return error(f'{e}. Giving up after {count - 1} retries') if count > 1 else error(str(e))
5870             raise e
5871
5872         if not count:
5873             return warn(e)
5874         elif isinstance(e, ExtractorError):
5875             e = remove_end(str_or_none(e.cause) or e.orig_msg, '.')
5876         warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
5877
5878         delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
5879         if delay:
5880             info(f'Sleeping {delay:.2f} seconds ...')
5881             time.sleep(delay)
5882
5883
5884 def make_archive_id(ie, video_id):
5885     ie_key = ie if isinstance(ie, str) else ie.ie_key()
5886     return f'{ie_key.lower()} {video_id}'
5887
5888
5889 def truncate_string(s, left, right=0):
5890     assert left > 3 and right >= 0
5891     if s is None or len(s) <= left + right:
5892         return s
5893     return f'{s[:left-3]}...{s[-right:]}'
5894
5895
5896 def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):
5897     assert 'all' in alias_dict, '"all" alias is required'
5898     requested = list(start or [])
5899     for val in options:
5900         discard = val.startswith('-')
5901         if discard:
5902             val = val[1:]
5903
5904         if val in alias_dict:
5905             val = alias_dict[val] if not discard else [
5906                 i[1:] if i.startswith('-') else f'-{i}' for i in alias_dict[val]]
5907             # NB: Do not allow regex in aliases for performance
5908             requested = orderedSet_from_options(val, alias_dict, start=requested)
5909             continue
5910
5911         current = (filter(re.compile(val, re.I).fullmatch, alias_dict['all']) if use_regex
5912                    else [val] if val in alias_dict['all'] else None)
5913         if current is None:
5914             raise ValueError(val)
5915
5916         if discard:
5917             for item in current:
5918                 while item in requested:
5919                     requested.remove(item)
5920         else:
5921             requested.extend(current)
5922
5923     return orderedSet(requested)
5924
5925
5926 # Deprecated
5927 has_certifi = bool(certifi)
5928 has_websockets = bool(websockets)