yt_dlp/utils.py

   1 #!/usr/bin/env python3
   2 import asyncio
   3 import atexit
   4 import base64
   5 import binascii
   6 import calendar
   7 import codecs
   8 import collections
   9 import contextlib
  10 import ctypes
  11 import datetime
  12 import email.header
  13 import email.utils
  14 import errno
  15 import functools
  16 import gzip
  17 import hashlib
  18 import hmac
  19 import importlib.util
  20 import io
  21 import itertools
  22 import json
  23 import locale
  24 import math
  25 import mimetypes
  26 import operator
  27 import os
  28 import platform
  29 import random
  30 import re
  31 import shlex
  32 import socket
  33 import ssl
  34 import subprocess
  35 import sys
  36 import tempfile
  37 import time
  38 import traceback
  39 import urllib.parse
  40 import xml.etree.ElementTree
  41 import zlib
  42
  43 from .compat import (
  44     compat_brotli,
  45     compat_chr,
  46     compat_cookiejar,
  47     compat_etree_fromstring,
  48     compat_expanduser,
  49     compat_html_entities,
  50     compat_html_entities_html5,
  51     compat_HTMLParseError,
  52     compat_HTMLParser,
  53     compat_http_client,
  54     compat_HTTPError,
  55     compat_os_name,
  56     compat_parse_qs,
  57     compat_shlex_quote,
  58     compat_str,
  59     compat_struct_pack,
  60     compat_struct_unpack,
  61     compat_urllib_error,
  62     compat_urllib_parse_unquote_plus,
  63     compat_urllib_parse_urlencode,
  64     compat_urllib_parse_urlparse,
  65     compat_urllib_request,
  66     compat_urlparse,
  67     compat_websockets,
  68 )
  69 from .socks import ProxyType, sockssocket
  70
  71 try:
  72     import certifi
  73     has_certifi = True
  74 except ImportError:
  75     has_certifi = False
  76
  77
  78 def register_socks_protocols():
  79     # "Register" SOCKS protocols
  80     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  81     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  82     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  83         if scheme not in compat_urlparse.uses_netloc:
  84             compat_urlparse.uses_netloc.append(scheme)
  85
  86
  87 # This is not clearly defined otherwise
  88 compiled_regex_type = type(re.compile(''))
  89
  90
  91 def random_user_agent():
  92     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  93     _CHROME_VERSIONS = (
  94         '90.0.4430.212',
  95         '90.0.4430.24',
  96         '90.0.4430.70',
  97         '90.0.4430.72',
  98         '90.0.4430.85',
  99         '90.0.4430.93',
 100         '91.0.4472.101',
 101         '91.0.4472.106',
 102         '91.0.4472.114',
 103         '91.0.4472.124',
 104         '91.0.4472.164',
 105         '91.0.4472.19',
 106         '91.0.4472.77',
 107         '92.0.4515.107',
 108         '92.0.4515.115',
 109         '92.0.4515.131',
 110         '92.0.4515.159',
 111         '92.0.4515.43',
 112         '93.0.4556.0',
 113         '93.0.4577.15',
 114         '93.0.4577.63',
 115         '93.0.4577.82',
 116         '94.0.4606.41',
 117         '94.0.4606.54',
 118         '94.0.4606.61',
 119         '94.0.4606.71',
 120         '94.0.4606.81',
 121         '94.0.4606.85',
 122         '95.0.4638.17',
 123         '95.0.4638.50',
 124         '95.0.4638.54',
 125         '95.0.4638.69',
 126         '95.0.4638.74',
 127         '96.0.4664.18',
 128         '96.0.4664.45',
 129         '96.0.4664.55',
 130         '96.0.4664.93',
 131         '97.0.4692.20',
 132     )
 133     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 134
 135
 136 SUPPORTED_ENCODINGS = [
 137     'gzip', 'deflate'
 138 ]
 139 if compat_brotli:
 140     SUPPORTED_ENCODINGS.append('br')
 141
 142 std_headers = {
 143     'User-Agent': random_user_agent(),
 144     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 145     'Accept-Language': 'en-us,en;q=0.5',
 146     'Sec-Fetch-Mode': 'navigate',
 147 }
 148
 149
 150 USER_AGENTS = {
 151     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 152 }
 153
 154
 155 NO_DEFAULT = object()
 156
 157 ENGLISH_MONTH_NAMES = [
 158     'January', 'February', 'March', 'April', 'May', 'June',
 159     'July', 'August', 'September', 'October', 'November', 'December']
 160
 161 MONTH_NAMES = {
 162     'en': ENGLISH_MONTH_NAMES,
 163     'fr': [
 164         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 165         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 166 }
 167
 168 KNOWN_EXTENSIONS = (
 169     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
 170     'flv', 'f4v', 'f4a', 'f4b',
 171     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
 172     'mkv', 'mka', 'mk3d',
 173     'avi', 'divx',
 174     'mov',
 175     'asf', 'wmv', 'wma',
 176     '3gp', '3g2',
 177     'mp3',
 178     'flac',
 179     'ape',
 180     'wav',
 181     'f4f', 'f4m', 'm3u8', 'smil')
 182
 183 # needed for sanitizing filenames in restricted mode
 184 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 185                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 186                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 187
 188 DATE_FORMATS = (
 189     '%d %B %Y',
 190     '%d %b %Y',
 191     '%B %d %Y',
 192     '%B %dst %Y',
 193     '%B %dnd %Y',
 194     '%B %drd %Y',
 195     '%B %dth %Y',
 196     '%b %d %Y',
 197     '%b %dst %Y',
 198     '%b %dnd %Y',
 199     '%b %drd %Y',
 200     '%b %dth %Y',
 201     '%b %dst %Y %I:%M',
 202     '%b %dnd %Y %I:%M',
 203     '%b %drd %Y %I:%M',
 204     '%b %dth %Y %I:%M',
 205     '%Y %m %d',
 206     '%Y-%m-%d',
 207     '%Y.%m.%d.',
 208     '%Y/%m/%d',
 209     '%Y/%m/%d %H:%M',
 210     '%Y/%m/%d %H:%M:%S',
 211     '%Y%m%d%H%M',
 212     '%Y%m%d%H%M%S',
 213     '%Y%m%d',
 214     '%Y-%m-%d %H:%M',
 215     '%Y-%m-%d %H:%M:%S',
 216     '%Y-%m-%d %H:%M:%S.%f',
 217     '%Y-%m-%d %H:%M:%S:%f',
 218     '%d.%m.%Y %H:%M',
 219     '%d.%m.%Y %H.%M',
 220     '%Y-%m-%dT%H:%M:%SZ',
 221     '%Y-%m-%dT%H:%M:%S.%fZ',
 222     '%Y-%m-%dT%H:%M:%S.%f0Z',
 223     '%Y-%m-%dT%H:%M:%S',
 224     '%Y-%m-%dT%H:%M:%S.%f',
 225     '%Y-%m-%dT%H:%M',
 226     '%b %d %Y at %H:%M',
 227     '%b %d %Y at %H:%M:%S',
 228     '%B %d %Y at %H:%M',
 229     '%B %d %Y at %H:%M:%S',
 230     '%H:%M %d-%b-%Y',
 231 )
 232
 233 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 234 DATE_FORMATS_DAY_FIRST.extend([
 235     '%d-%m-%Y',
 236     '%d.%m.%Y',
 237     '%d.%m.%y',
 238     '%d/%m/%Y',
 239     '%d/%m/%y',
 240     '%d/%m/%Y %H:%M:%S',
 241 ])
 242
 243 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 244 DATE_FORMATS_MONTH_FIRST.extend([
 245     '%m-%d-%Y',
 246     '%m.%d.%Y',
 247     '%m/%d/%Y',
 248     '%m/%d/%y',
 249     '%m/%d/%Y %H:%M:%S',
 250 ])
 251
 252 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 253 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
 254
 255
 256 def preferredencoding():
 257     """Get preferred encoding.
 258
 259     Returns the best encoding scheme for the system, based on
 260     locale.getpreferredencoding() and some further tweaks.
 261     """
 262     try:
 263         pref = locale.getpreferredencoding()
 264         'TEST'.encode(pref)
 265     except Exception:
 266         pref = 'UTF-8'
 267
 268     return pref
 269
 270
 271 def write_json_file(obj, fn):
 272     """ Encode obj as JSON and write it to fn, atomically if possible """
 273
 274     tf = tempfile.NamedTemporaryFile(
 275         prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 276         suffix='.tmp', delete=False, mode='w', encoding='utf-8')
 277
 278     try:
 279         with tf:
 280             json.dump(obj, tf, ensure_ascii=False)
 281         if sys.platform == 'win32':
 282             # Need to remove existing file on Windows, else os.rename raises
 283             # WindowsError or FileExistsError.
 284             try:
 285                 os.unlink(fn)
 286             except OSError:
 287                 pass
 288         try:
 289             mask = os.umask(0)
 290             os.umask(mask)
 291             os.chmod(tf.name, 0o666 & ~mask)
 292         except OSError:
 293             pass
 294         os.rename(tf.name, fn)
 295     except Exception:
 296         try:
 297             os.remove(tf.name)
 298         except OSError:
 299             pass
 300         raise
 301
 302
 303 def find_xpath_attr(node, xpath, key, val=None):
 304     """ Find the xpath xpath[@key=val] """
 305     assert re.match(r'^[a-zA-Z_-]+$', key)
 306     expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
 307     return node.find(expr)
 308
 309 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 310 # the namespace parameter
 311
 312
 313 def xpath_with_ns(path, ns_map):
 314     components = [c.split(':') for c in path.split('/')]
 315     replaced = []
 316     for c in components:
 317         if len(c) == 1:
 318             replaced.append(c[0])
 319         else:
 320             ns, tag = c
 321             replaced.append('{%s}%s' % (ns_map[ns], tag))
 322     return '/'.join(replaced)
 323
 324
 325 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 326     def _find_xpath(xpath):
 327         return node.find(xpath)
 328
 329     if isinstance(xpath, (str, compat_str)):
 330         n = _find_xpath(xpath)
 331     else:
 332         for xp in xpath:
 333             n = _find_xpath(xp)
 334             if n is not None:
 335                 break
 336
 337     if n is None:
 338         if default is not NO_DEFAULT:
 339             return default
 340         elif fatal:
 341             name = xpath if name is None else name
 342             raise ExtractorError('Could not find XML element %s' % name)
 343         else:
 344             return None
 345     return n
 346
 347
 348 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 349     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 350     if n is None or n == default:
 351         return n
 352     if n.text is None:
 353         if default is not NO_DEFAULT:
 354             return default
 355         elif fatal:
 356             name = xpath if name is None else name
 357             raise ExtractorError('Could not find XML element\'s text %s' % name)
 358         else:
 359             return None
 360     return n.text
 361
 362
 363 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 364     n = find_xpath_attr(node, xpath, key)
 365     if n is None:
 366         if default is not NO_DEFAULT:
 367             return default
 368         elif fatal:
 369             name = f'{xpath}[@{key}]' if name is None else name
 370             raise ExtractorError('Could not find XML attribute %s' % name)
 371         else:
 372             return None
 373     return n.attrib[key]
 374
 375
 376 def get_element_by_id(id, html):
 377     """Return the content of the tag with the specified ID in the passed HTML document"""
 378     return get_element_by_attribute('id', id, html)
 379
 380
 381 def get_element_html_by_id(id, html):
 382     """Return the html of the tag with the specified ID in the passed HTML document"""
 383     return get_element_html_by_attribute('id', id, html)
 384
 385
 386 def get_element_by_class(class_name, html):
 387     """Return the content of the first tag with the specified class in the passed HTML document"""
 388     retval = get_elements_by_class(class_name, html)
 389     return retval[0] if retval else None
 390
 391
 392 def get_element_html_by_class(class_name, html):
 393     """Return the html of the first tag with the specified class in the passed HTML document"""
 394     retval = get_elements_html_by_class(class_name, html)
 395     return retval[0] if retval else None
 396
 397
 398 def get_element_by_attribute(attribute, value, html, escape_value=True):
 399     retval = get_elements_by_attribute(attribute, value, html, escape_value)
 400     return retval[0] if retval else None
 401
 402
 403 def get_element_html_by_attribute(attribute, value, html, escape_value=True):
 404     retval = get_elements_html_by_attribute(attribute, value, html, escape_value)
 405     return retval[0] if retval else None
 406
 407
 408 def get_elements_by_class(class_name, html):
 409     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 410     return get_elements_by_attribute(
 411         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
 412         html, escape_value=False)
 413
 414
 415 def get_elements_html_by_class(class_name, html):
 416     """Return the html of all tags with the specified class in the passed HTML document as a list"""
 417     return get_elements_html_by_attribute(
 418         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
 419         html, escape_value=False)
 420
 421
 422 def get_elements_by_attribute(*args, **kwargs):
 423     """Return the content of the tag with the specified attribute in the passed HTML document"""
 424     return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 425
 426
 427 def get_elements_html_by_attribute(*args, **kwargs):
 428     """Return the html of the tag with the specified attribute in the passed HTML document"""
 429     return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 430
 431
 432 def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True):
 433     """
 434     Return the text (content) and the html (whole) of the tag with the specified
 435     attribute in the passed HTML document
 436     """
 437
 438     quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
 439
 440     value = re.escape(value) if escape_value else value
 441
 442     partial_element_re = rf'''(?x)
 443         <(?P<tag>[a-zA-Z0-9:._-]+)
 444          (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
 445          \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 446         '''
 447
 448     for m in re.finditer(partial_element_re, html):
 449         content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
 450
 451         yield (
 452             unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
 453             whole
 454         )
 455
 456
 457 class HTMLBreakOnClosingTagParser(compat_HTMLParser):
 458     """
 459     HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 460     closing tag for the first opening tag it has encountered, and can be used
 461     as a context manager
 462     """
 463
 464     class HTMLBreakOnClosingTagException(Exception):
 465         pass
 466
 467     def __init__(self):
 468         self.tagstack = collections.deque()
 469         compat_HTMLParser.__init__(self)
 470
 471     def __enter__(self):
 472         return self
 473
 474     def __exit__(self, *_):
 475         self.close()
 476
 477     def close(self):
 478         # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 479         # so data remains buffered; we no longer have any interest in it, thus
 480         # override this method to discard it
 481         pass
 482
 483     def handle_starttag(self, tag, _):
 484         self.tagstack.append(tag)
 485
 486     def handle_endtag(self, tag):
 487         if not self.tagstack:
 488             raise compat_HTMLParseError('no tags in the stack')
 489         while self.tagstack:
 490             inner_tag = self.tagstack.pop()
 491             if inner_tag == tag:
 492                 break
 493         else:
 494             raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 495         if not self.tagstack:
 496             raise self.HTMLBreakOnClosingTagException()
 497
 498
 499 def get_element_text_and_html_by_tag(tag, html):
 500     """
 501     For the first element with the specified tag in the passed HTML document
 502     return its' content (text) and the whole element (html)
 503     """
 504     def find_or_raise(haystack, needle, exc):
 505         try:
 506             return haystack.index(needle)
 507         except ValueError:
 508             raise exc
 509     closing_tag = f'</{tag}>'
 510     whole_start = find_or_raise(
 511         html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 512     content_start = find_or_raise(
 513         html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 514     content_start += whole_start + 1
 515     with HTMLBreakOnClosingTagParser() as parser:
 516         parser.feed(html[whole_start:content_start])
 517         if not parser.tagstack or parser.tagstack[0] != tag:
 518             raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 519         offset = content_start
 520         while offset < len(html):
 521             next_closing_tag_start = find_or_raise(
 522                 html[offset:], closing_tag,
 523                 compat_HTMLParseError(f'closing {tag} tag not found'))
 524             next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 525             try:
 526                 parser.feed(html[offset:offset + next_closing_tag_end])
 527                 offset += next_closing_tag_end
 528             except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 529                 return html[content_start:offset + next_closing_tag_start], \
 530                     html[whole_start:offset + next_closing_tag_end]
 531         raise compat_HTMLParseError('unexpected end of html')
 532
 533
 534 class HTMLAttributeParser(compat_HTMLParser):
 535     """Trivial HTML parser to gather the attributes for a single element"""
 536
 537     def __init__(self):
 538         self.attrs = {}
 539         compat_HTMLParser.__init__(self)
 540
 541     def handle_starttag(self, tag, attrs):
 542         self.attrs = dict(attrs)
 543
 544
 545 class HTMLListAttrsParser(compat_HTMLParser):
 546     """HTML parser to gather the attributes for the elements of a list"""
 547
 548     def __init__(self):
 549         compat_HTMLParser.__init__(self)
 550         self.items = []
 551         self._level = 0
 552
 553     def handle_starttag(self, tag, attrs):
 554         if tag == 'li' and self._level == 0:
 555             self.items.append(dict(attrs))
 556         self._level += 1
 557
 558     def handle_endtag(self, tag):
 559         self._level -= 1
 560
 561
 562 def extract_attributes(html_element):
 563     """Given a string for an HTML element such as
 564     <el
 565          a="foo" B="bar" c="&98;az" d=boz
 566          empty= noval entity="&amp;"
 567          sq='"' dq="'"
 568     >
 569     Decode and return a dictionary of attributes.
 570     {
 571         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 572         'empty': '', 'noval': None, 'entity': '&',
 573         'sq': '"', 'dq': '\''
 574     }.
 575     """
 576     parser = HTMLAttributeParser()
 577     try:
 578         parser.feed(html_element)
 579         parser.close()
 580     # Older Python may throw HTMLParseError in case of malformed HTML
 581     except compat_HTMLParseError:
 582         pass
 583     return parser.attrs
 584
 585
 586 def parse_list(webpage):
 587     """Given a string for an series of HTML <li> elements,
 588     return a dictionary of their attributes"""
 589     parser = HTMLListAttrsParser()
 590     parser.feed(webpage)
 591     parser.close()
 592     return parser.items
 593
 594
 595 def clean_html(html):
 596     """Clean an HTML snippet into a readable string"""
 597
 598     if html is None:  # Convenience for sanitizing descriptions etc.
 599         return html
 600
 601     html = re.sub(r'\s+', ' ', html)
 602     html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 603     html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
 604     # Strip html tags
 605     html = re.sub('<.*?>', '', html)
 606     # Replace html entities
 607     html = unescapeHTML(html)
 608     return html.strip()
 609
 610
 611 def sanitize_open(filename, open_mode):
 612     """Try to open the given filename, and slightly tweak it if this fails.
 613
 614     Attempts to open the given filename. If this fails, it tries to change
 615     the filename slightly, step by step, until it's either able to open it
 616     or it fails and raises a final exception, like the standard open()
 617     function.
 618
 619     It returns the tuple (stream, definitive_file_name).
 620     """
 621     if filename == '-':
 622         if sys.platform == 'win32':
 623             import msvcrt
 624             msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 625         return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 626
 627     for attempt in range(2):
 628         try:
 629             try:
 630                 if sys.platform == 'win32':
 631                     # FIXME: An exclusive lock also locks the file from being read.
 632                     # Since windows locks are mandatory, don't lock the file on windows (for now).
 633                     # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
 634                     raise LockingUnsupportedError()
 635                 stream = locked_file(filename, open_mode, block=False).__enter__()
 636             except LockingUnsupportedError:
 637                 stream = open(filename, open_mode)
 638             return (stream, filename)
 639         except OSError as err:
 640             if attempt or err.errno in (errno.EACCES,):
 641                 raise
 642             old_filename, filename = filename, sanitize_path(filename)
 643             if old_filename == filename:
 644                 raise
 645
 646
 647 def timeconvert(timestr):
 648     """Convert RFC 2822 defined time string into system timestamp"""
 649     timestamp = None
 650     timetuple = email.utils.parsedate_tz(timestr)
 651     if timetuple is not None:
 652         timestamp = email.utils.mktime_tz(timetuple)
 653     return timestamp
 654
 655
 656 def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
 657     """Sanitizes a string so it could be used as part of a filename.
 658     @param restricted   Use a stricter subset of allowed characters
 659     @param is_id        Whether this is an ID that should be kept unchanged if possible.
 660                         If unset, yt-dlp's new sanitization rules are in effect
 661     """
 662     if s == '':
 663         return ''
 664
 665     def replace_insane(char):
 666         if restricted and char in ACCENT_CHARS:
 667             return ACCENT_CHARS[char]
 668         elif not restricted and char == '\n':
 669             return '\0 '
 670         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 671             return ''
 672         elif char == '"':
 673             return '' if restricted else '\''
 674         elif char == ':':
 675             return '\0_\0-' if restricted else '\0 \0-'
 676         elif char in '\\/|*<>':
 677             return '\0_'
 678         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
 679             return '\0_'
 680         return char
 681
 682     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
 683     result = ''.join(map(replace_insane, s))
 684     if is_id is NO_DEFAULT:
 685         result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result)  # Remove repeated substitute chars
 686         STRIP_RE = '(?:\0.|[ _-])*'
 687         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 688     result = result.replace('\0', '') or '_'
 689
 690     if not is_id:
 691         while '__' in result:
 692             result = result.replace('__', '_')
 693         result = result.strip('_')
 694         # Common case of "Foreign band name - English song title"
 695         if restricted and result.startswith('-_'):
 696             result = result[2:]
 697         if result.startswith('-'):
 698             result = '_' + result[len('-'):]
 699         result = result.lstrip('.')
 700         if not result:
 701             result = '_'
 702     return result
 703
 704
 705 def sanitize_path(s, force=False):
 706     """Sanitizes and normalizes path on Windows"""
 707     if sys.platform == 'win32':
 708         force = False
 709         drive_or_unc, _ = os.path.splitdrive(s)
 710     elif force:
 711         drive_or_unc = ''
 712     else:
 713         return s
 714
 715     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 716     if drive_or_unc:
 717         norm_path.pop(0)
 718     sanitized_path = [
 719         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 720         for path_part in norm_path]
 721     if drive_or_unc:
 722         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 723     elif force and s and s[0] == os.path.sep:
 724         sanitized_path.insert(0, os.path.sep)
 725     return os.path.join(*sanitized_path)
 726
 727
 728 def sanitize_url(url):
 729     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 730     # the number of unwanted failures due to missing protocol
 731     if url.startswith('//'):
 732         return 'http:%s' % url
 733     # Fix some common typos seen so far
 734     COMMON_TYPOS = (
 735         # https://github.com/ytdl-org/youtube-dl/issues/15649
 736         (r'^httpss://', r'https://'),
 737         # https://bx1.be/lives/direct-tv/
 738         (r'^rmtp([es]?)://', r'rtmp\1://'),
 739     )
 740     for mistake, fixup in COMMON_TYPOS:
 741         if re.match(mistake, url):
 742             return re.sub(mistake, fixup, url)
 743     return url
 744
 745
 746 def extract_basic_auth(url):
 747     parts = compat_urlparse.urlsplit(url)
 748     if parts.username is None:
 749         return url, None
 750     url = compat_urlparse.urlunsplit(parts._replace(netloc=(
 751         parts.hostname if parts.port is None
 752         else '%s:%d' % (parts.hostname, parts.port))))
 753     auth_payload = base64.b64encode(
 754         ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
 755     return url, 'Basic ' + auth_payload.decode('utf-8')
 756
 757
 758 def sanitized_Request(url, *args, **kwargs):
 759     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 760     if auth_header is not None:
 761         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 762         headers['Authorization'] = auth_header
 763     return compat_urllib_request.Request(url, *args, **kwargs)
 764
 765
 766 def expand_path(s):
 767     """Expand shell variables and ~"""
 768     return os.path.expandvars(compat_expanduser(s))
 769
 770
 771 def orderedSet(iterable):
 772     """ Remove all duplicates from the input iterable """
 773     res = []
 774     for el in iterable:
 775         if el not in res:
 776             res.append(el)
 777     return res
 778
 779
 780 def _htmlentity_transform(entity_with_semicolon):
 781     """Transforms an HTML entity to a character."""
 782     entity = entity_with_semicolon[:-1]
 783
 784     # Known non-numeric HTML entity
 785     if entity in compat_html_entities.name2codepoint:
 786         return compat_chr(compat_html_entities.name2codepoint[entity])
 787
 788     # TODO: HTML5 allows entities without a semicolon. For example,
 789     # '&Eacuteric' should be decoded as 'Éric'.
 790     if entity_with_semicolon in compat_html_entities_html5:
 791         return compat_html_entities_html5[entity_with_semicolon]
 792
 793     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 794     if mobj is not None:
 795         numstr = mobj.group(1)
 796         if numstr.startswith('x'):
 797             base = 16
 798             numstr = '0%s' % numstr
 799         else:
 800             base = 10
 801         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 802         try:
 803             return compat_chr(int(numstr, base))
 804         except ValueError:
 805             pass
 806
 807     # Unknown entity in name, return its literal representation
 808     return '&%s;' % entity
 809
 810
 811 def unescapeHTML(s):
 812     if s is None:
 813         return None
 814     assert type(s) == compat_str
 815
 816     return re.sub(
 817         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 818
 819
 820 def escapeHTML(text):
 821     return (
 822         text
 823         .replace('&', '&amp;')
 824         .replace('<', '&lt;')
 825         .replace('>', '&gt;')
 826         .replace('"', '&quot;')
 827         .replace("'", '&#39;')
 828     )
 829
 830
 831 def process_communicate_or_kill(p, *args, **kwargs):
 832     try:
 833         return p.communicate(*args, **kwargs)
 834     except BaseException:  # Including KeyboardInterrupt
 835         p.kill()
 836         p.wait()
 837         raise
 838
 839
 840 class Popen(subprocess.Popen):
 841     if sys.platform == 'win32':
 842         _startupinfo = subprocess.STARTUPINFO()
 843         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 844     else:
 845         _startupinfo = None
 846
 847     def __init__(self, *args, **kwargs):
 848         super().__init__(*args, **kwargs, startupinfo=self._startupinfo)
 849
 850     def communicate_or_kill(self, *args, **kwargs):
 851         return process_communicate_or_kill(self, *args, **kwargs)
 852
 853
 854 def get_subprocess_encoding():
 855     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 856         # For subprocess calls, encode with locale encoding
 857         # Refer to http://stackoverflow.com/a/9951851/35070
 858         encoding = preferredencoding()
 859     else:
 860         encoding = sys.getfilesystemencoding()
 861     if encoding is None:
 862         encoding = 'utf-8'
 863     return encoding
 864
 865
 866 def encodeFilename(s, for_subprocess=False):
 867     assert type(s) == str
 868     return s
 869
 870
 871 def decodeFilename(b, for_subprocess=False):
 872     return b
 873
 874
 875 def encodeArgument(s):
 876     # Legacy code that uses byte strings
 877     # Uncomment the following line after fixing all post processors
 878     # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 879     return s if isinstance(s, str) else s.decode('ascii')
 880
 881
 882 def decodeArgument(b):
 883     return b
 884
 885
 886 def decodeOption(optval):
 887     if optval is None:
 888         return optval
 889     if isinstance(optval, bytes):
 890         optval = optval.decode(preferredencoding())
 891
 892     assert isinstance(optval, compat_str)
 893     return optval
 894
 895
 896 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 897
 898
 899 def timetuple_from_msec(msec):
 900     secs, msec = divmod(msec, 1000)
 901     mins, secs = divmod(secs, 60)
 902     hrs, mins = divmod(mins, 60)
 903     return _timetuple(hrs, mins, secs, msec)
 904
 905
 906 def formatSeconds(secs, delim=':', msec=False):
 907     time = timetuple_from_msec(secs * 1000)
 908     if time.hours:
 909         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 910     elif time.minutes:
 911         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 912     else:
 913         ret = '%d' % time.seconds
 914     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 915
 916
 917 def _ssl_load_windows_store_certs(ssl_context, storename):
 918     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 919     try:
 920         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 921                  if encoding == 'x509_asn' and (
 922                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 923     except PermissionError:
 924         return
 925     for cert in certs:
 926         try:
 927             ssl_context.load_verify_locations(cadata=cert)
 928         except ssl.SSLError:
 929             pass
 930
 931
 932 def make_HTTPS_handler(params, **kwargs):
 933     opts_check_certificate = not params.get('nocheckcertificate')
 934     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 935     context.check_hostname = opts_check_certificate
 936     if params.get('legacyserverconnect'):
 937         context.options |= 4  # SSL_OP_LEGACY_SERVER_CONNECT
 938     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
 939     if opts_check_certificate:
 940         if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
 941             context.load_verify_locations(cafile=certifi.where())
 942         else:
 943             try:
 944                 context.load_default_certs()
 945                 # Work around the issue in load_default_certs when there are bad certificates. See:
 946                 # https://github.com/yt-dlp/yt-dlp/issues/1060,
 947                 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
 948             except ssl.SSLError:
 949                 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
 950                 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
 951                     # Create a new context to discard any certificates that were already loaded
 952                     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 953                     context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
 954                     for storename in ('CA', 'ROOT'):
 955                         _ssl_load_windows_store_certs(context, storename)
 956                 context.set_default_verify_paths()
 957     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 958
 959
 960 def bug_reports_message(before=';'):
 961     msg = ('please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , '
 962            'filling out the appropriate issue template. '
 963            'Confirm you are on the latest version using  yt-dlp -U')
 964
 965     before = before.rstrip()
 966     if not before or before.endswith(('.', '!', '?')):
 967         msg = msg[0].title() + msg[1:]
 968
 969     return (before + ' ' if before else '') + msg
 970
 971
 972 class YoutubeDLError(Exception):
 973     """Base exception for YoutubeDL errors."""
 974     msg = None
 975
 976     def __init__(self, msg=None):
 977         if msg is not None:
 978             self.msg = msg
 979         elif self.msg is None:
 980             self.msg = type(self).__name__
 981         super().__init__(self.msg)
 982
 983
 984 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
 985 if hasattr(ssl, 'CertificateError'):
 986     network_exceptions.append(ssl.CertificateError)
 987 network_exceptions = tuple(network_exceptions)
 988
 989
 990 class ExtractorError(YoutubeDLError):
 991     """Error during info extraction."""
 992
 993     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
 994         """ tb, if given, is the original traceback (so that it can be printed out).
 995         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
 996         """
 997         if sys.exc_info()[0] in network_exceptions:
 998             expected = True
 999
1000         self.orig_msg = str(msg)
1001         self.traceback = tb
1002         self.expected = expected
1003         self.cause = cause
1004         self.video_id = video_id
1005         self.ie = ie
1006         self.exc_info = sys.exc_info()  # preserve original exception
1007
1008         super().__init__(''.join((
1009             format_field(ie, template='[%s] '),
1010             format_field(video_id, template='%s: '),
1011             msg,
1012             format_field(cause, template=' (caused by %r)'),
1013             '' if expected else bug_reports_message())))
1014
1015     def format_traceback(self):
1016         return join_nonempty(
1017             self.traceback and ''.join(traceback.format_tb(self.traceback)),
1018             self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
1019             delim='\n') or None
1020
1021
1022 class UnsupportedError(ExtractorError):
1023     def __init__(self, url):
1024         super().__init__(
1025             'Unsupported URL: %s' % url, expected=True)
1026         self.url = url
1027
1028
1029 class RegexNotFoundError(ExtractorError):
1030     """Error when a regex didn't match"""
1031     pass
1032
1033
1034 class GeoRestrictedError(ExtractorError):
1035     """Geographic restriction Error exception.
1036
1037     This exception may be thrown when a video is not available from your
1038     geographic location due to geographic restrictions imposed by a website.
1039     """
1040
1041     def __init__(self, msg, countries=None, **kwargs):
1042         kwargs['expected'] = True
1043         super().__init__(msg, **kwargs)
1044         self.countries = countries
1045
1046
1047 class DownloadError(YoutubeDLError):
1048     """Download Error exception.
1049
1050     This exception may be thrown by FileDownloader objects if they are not
1051     configured to continue on errors. They will contain the appropriate
1052     error message.
1053     """
1054
1055     def __init__(self, msg, exc_info=None):
1056         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1057         super().__init__(msg)
1058         self.exc_info = exc_info
1059
1060
1061 class EntryNotInPlaylist(YoutubeDLError):
1062     """Entry not in playlist exception.
1063
1064     This exception will be thrown by YoutubeDL when a requested entry
1065     is not found in the playlist info_dict
1066     """
1067     msg = 'Entry not found in info'
1068
1069
1070 class SameFileError(YoutubeDLError):
1071     """Same File exception.
1072
1073     This exception will be thrown by FileDownloader objects if they detect
1074     multiple files would have to be downloaded to the same file on disk.
1075     """
1076     msg = 'Fixed output name but more than one file to download'
1077
1078     def __init__(self, filename=None):
1079         if filename is not None:
1080             self.msg += f': {filename}'
1081         super().__init__(self.msg)
1082
1083
1084 class PostProcessingError(YoutubeDLError):
1085     """Post Processing exception.
1086
1087     This exception may be raised by PostProcessor's .run() method to
1088     indicate an error in the postprocessing task.
1089     """
1090
1091
1092 class DownloadCancelled(YoutubeDLError):
1093     """ Exception raised when the download queue should be interrupted """
1094     msg = 'The download was cancelled'
1095
1096
1097 class ExistingVideoReached(DownloadCancelled):
1098     """ --break-on-existing triggered """
1099     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1100
1101
1102 class RejectedVideoReached(DownloadCancelled):
1103     """ --break-on-reject triggered """
1104     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1105
1106
1107 class MaxDownloadsReached(DownloadCancelled):
1108     """ --max-downloads limit has been reached. """
1109     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1110
1111
1112 class ReExtractInfo(YoutubeDLError):
1113     """ Video info needs to be re-extracted. """
1114
1115     def __init__(self, msg, expected=False):
1116         super().__init__(msg)
1117         self.expected = expected
1118
1119
1120 class ThrottledDownload(ReExtractInfo):
1121     """ Download speed below --throttled-rate. """
1122     msg = 'The download speed is below throttle limit'
1123
1124     def __init__(self):
1125         super().__init__(self.msg, expected=False)
1126
1127
1128 class UnavailableVideoError(YoutubeDLError):
1129     """Unavailable Format exception.
1130
1131     This exception will be thrown when a video is requested
1132     in a format that is not available for that video.
1133     """
1134     msg = 'Unable to download video'
1135
1136     def __init__(self, err=None):
1137         if err is not None:
1138             self.msg += f': {err}'
1139         super().__init__(self.msg)
1140
1141
1142 class ContentTooShortError(YoutubeDLError):
1143     """Content Too Short exception.
1144
1145     This exception may be raised by FileDownloader objects when a file they
1146     download is too small for what the server announced first, indicating
1147     the connection was probably interrupted.
1148     """
1149
1150     def __init__(self, downloaded, expected):
1151         super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
1152         # Both in bytes
1153         self.downloaded = downloaded
1154         self.expected = expected
1155
1156
1157 class XAttrMetadataError(YoutubeDLError):
1158     def __init__(self, code=None, msg='Unknown error'):
1159         super().__init__(msg)
1160         self.code = code
1161         self.msg = msg
1162
1163         # Parsing code and msg
1164         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1165                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1166             self.reason = 'NO_SPACE'
1167         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1168             self.reason = 'VALUE_TOO_LONG'
1169         else:
1170             self.reason = 'NOT_SUPPORTED'
1171
1172
1173 class XAttrUnavailableError(YoutubeDLError):
1174     pass
1175
1176
1177 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1178     hc = http_class(*args, **kwargs)
1179     source_address = ydl_handler._params.get('source_address')
1180
1181     if source_address is not None:
1182         # This is to workaround _create_connection() from socket where it will try all
1183         # address data from getaddrinfo() including IPv6. This filters the result from
1184         # getaddrinfo() based on the source_address value.
1185         # This is based on the cpython socket.create_connection() function.
1186         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1187         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1188             host, port = address
1189             err = None
1190             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1191             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1192             ip_addrs = [addr for addr in addrs if addr[0] == af]
1193             if addrs and not ip_addrs:
1194                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1195                 raise OSError(
1196                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1197                     % (ip_version, source_address[0]))
1198             for res in ip_addrs:
1199                 af, socktype, proto, canonname, sa = res
1200                 sock = None
1201                 try:
1202                     sock = socket.socket(af, socktype, proto)
1203                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1204                         sock.settimeout(timeout)
1205                     sock.bind(source_address)
1206                     sock.connect(sa)
1207                     err = None  # Explicitly break reference cycle
1208                     return sock
1209                 except OSError as _:
1210                     err = _
1211                     if sock is not None:
1212                         sock.close()
1213             if err is not None:
1214                 raise err
1215             else:
1216                 raise OSError('getaddrinfo returns an empty list')
1217         if hasattr(hc, '_create_connection'):
1218             hc._create_connection = _create_connection
1219         hc.source_address = (source_address, 0)
1220
1221     return hc
1222
1223
1224 def handle_youtubedl_headers(headers):
1225     filtered_headers = headers
1226
1227     if 'Youtubedl-no-compression' in filtered_headers:
1228         filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
1229         del filtered_headers['Youtubedl-no-compression']
1230
1231     return filtered_headers
1232
1233
1234 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
1235     """Handler for HTTP requests and responses.
1236
1237     This class, when installed with an OpenerDirector, automatically adds
1238     the standard headers to every HTTP request and handles gzipped and
1239     deflated responses from web servers. If compression is to be avoided in
1240     a particular request, the original request in the program code only has
1241     to include the HTTP header "Youtubedl-no-compression", which will be
1242     removed before making the real request.
1243
1244     Part of this code was copied from:
1245
1246     http://techknack.net/python-urllib2-handlers/
1247
1248     Andrew Rowls, the author of that code, agreed to release it to the
1249     public domain.
1250     """
1251
1252     def __init__(self, params, *args, **kwargs):
1253         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
1254         self._params = params
1255
1256     def http_open(self, req):
1257         conn_class = compat_http_client.HTTPConnection
1258
1259         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1260         if socks_proxy:
1261             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1262             del req.headers['Ytdl-socks-proxy']
1263
1264         return self.do_open(functools.partial(
1265             _create_http_connection, self, conn_class, False),
1266             req)
1267
1268     @staticmethod
1269     def deflate(data):
1270         if not data:
1271             return data
1272         try:
1273             return zlib.decompress(data, -zlib.MAX_WBITS)
1274         except zlib.error:
1275             return zlib.decompress(data)
1276
1277     @staticmethod
1278     def brotli(data):
1279         if not data:
1280             return data
1281         return compat_brotli.decompress(data)
1282
1283     def http_request(self, req):
1284         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1285         # always respected by websites, some tend to give out URLs with non percent-encoded
1286         # non-ASCII characters (see telemb.py, ard.py [#3412])
1287         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1288         # To work around aforementioned issue we will replace request's original URL with
1289         # percent-encoded one
1290         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1291         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1292         url = req.get_full_url()
1293         url_escaped = escape_url(url)
1294
1295         # Substitute URL if any change after escaping
1296         if url != url_escaped:
1297             req = update_Request(req, url=url_escaped)
1298
1299         for h, v in self._params.get('http_headers', std_headers).items():
1300             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1301             # The dict keys are capitalized because of this bug by urllib
1302             if h.capitalize() not in req.headers:
1303                 req.add_header(h, v)
1304
1305         if 'Accept-encoding' not in req.headers:
1306             req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
1307
1308         req.headers = handle_youtubedl_headers(req.headers)
1309
1310         return req
1311
1312     def http_response(self, req, resp):
1313         old_resp = resp
1314         # gzip
1315         if resp.headers.get('Content-encoding', '') == 'gzip':
1316             content = resp.read()
1317             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1318             try:
1319                 uncompressed = io.BytesIO(gz.read())
1320             except OSError as original_ioerror:
1321                 # There may be junk add the end of the file
1322                 # See http://stackoverflow.com/q/4928560/35070 for details
1323                 for i in range(1, 1024):
1324                     try:
1325                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1326                         uncompressed = io.BytesIO(gz.read())
1327                     except OSError:
1328                         continue
1329                     break
1330                 else:
1331                     raise original_ioerror
1332             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1333             resp.msg = old_resp.msg
1334             del resp.headers['Content-encoding']
1335         # deflate
1336         if resp.headers.get('Content-encoding', '') == 'deflate':
1337             gz = io.BytesIO(self.deflate(resp.read()))
1338             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1339             resp.msg = old_resp.msg
1340             del resp.headers['Content-encoding']
1341         # brotli
1342         if resp.headers.get('Content-encoding', '') == 'br':
1343             resp = compat_urllib_request.addinfourl(
1344                 io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
1345             resp.msg = old_resp.msg
1346             del resp.headers['Content-encoding']
1347         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1348         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1349         if 300 <= resp.code < 400:
1350             location = resp.headers.get('Location')
1351             if location:
1352                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1353                 location = location.encode('iso-8859-1').decode('utf-8')
1354                 location_escaped = escape_url(location)
1355                 if location != location_escaped:
1356                     del resp.headers['Location']
1357                     resp.headers['Location'] = location_escaped
1358         return resp
1359
1360     https_request = http_request
1361     https_response = http_response
1362
1363
1364 def make_socks_conn_class(base_class, socks_proxy):
1365     assert issubclass(base_class, (
1366         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
1367
1368     url_components = compat_urlparse.urlparse(socks_proxy)
1369     if url_components.scheme.lower() == 'socks5':
1370         socks_type = ProxyType.SOCKS5
1371     elif url_components.scheme.lower() in ('socks', 'socks4'):
1372         socks_type = ProxyType.SOCKS4
1373     elif url_components.scheme.lower() == 'socks4a':
1374         socks_type = ProxyType.SOCKS4A
1375
1376     def unquote_if_non_empty(s):
1377         if not s:
1378             return s
1379         return compat_urllib_parse_unquote_plus(s)
1380
1381     proxy_args = (
1382         socks_type,
1383         url_components.hostname, url_components.port or 1080,
1384         True,  # Remote DNS
1385         unquote_if_non_empty(url_components.username),
1386         unquote_if_non_empty(url_components.password),
1387     )
1388
1389     class SocksConnection(base_class):
1390         def connect(self):
1391             self.sock = sockssocket()
1392             self.sock.setproxy(*proxy_args)
1393             if type(self.timeout) in (int, float):
1394                 self.sock.settimeout(self.timeout)
1395             self.sock.connect((self.host, self.port))
1396
1397             if isinstance(self, compat_http_client.HTTPSConnection):
1398                 if hasattr(self, '_context'):  # Python > 2.6
1399                     self.sock = self._context.wrap_socket(
1400                         self.sock, server_hostname=self.host)
1401                 else:
1402                     self.sock = ssl.wrap_socket(self.sock)
1403
1404     return SocksConnection
1405
1406
1407 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
1408     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1409         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
1410         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
1411         self._params = params
1412
1413     def https_open(self, req):
1414         kwargs = {}
1415         conn_class = self._https_conn_class
1416
1417         if hasattr(self, '_context'):  # python > 2.6
1418             kwargs['context'] = self._context
1419         if hasattr(self, '_check_hostname'):  # python 3.x
1420             kwargs['check_hostname'] = self._check_hostname
1421
1422         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1423         if socks_proxy:
1424             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1425             del req.headers['Ytdl-socks-proxy']
1426
1427         return self.do_open(functools.partial(
1428             _create_http_connection, self, conn_class, True),
1429             req, **kwargs)
1430
1431
1432 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
1433     """
1434     See [1] for cookie file format.
1435
1436     1. https://curl.haxx.se/docs/http-cookies.html
1437     """
1438     _HTTPONLY_PREFIX = '#HttpOnly_'
1439     _ENTRY_LEN = 7
1440     _HEADER = '''# Netscape HTTP Cookie File
1441 # This file is generated by yt-dlp.  Do not edit.
1442
1443 '''
1444     _CookieFileEntry = collections.namedtuple(
1445         'CookieFileEntry',
1446         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1447
1448     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1449         """
1450         Save cookies to a file.
1451
1452         Most of the code is taken from CPython 3.8 and slightly adapted
1453         to support cookie files with UTF-8 in both python 2 and 3.
1454         """
1455         if filename is None:
1456             if self.filename is not None:
1457                 filename = self.filename
1458             else:
1459                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1460
1461         # Store session cookies with `expires` set to 0 instead of an empty
1462         # string
1463         for cookie in self:
1464             if cookie.expires is None:
1465                 cookie.expires = 0
1466
1467         with open(filename, 'w', encoding='utf-8') as f:
1468             f.write(self._HEADER)
1469             now = time.time()
1470             for cookie in self:
1471                 if not ignore_discard and cookie.discard:
1472                     continue
1473                 if not ignore_expires and cookie.is_expired(now):
1474                     continue
1475                 if cookie.secure:
1476                     secure = 'TRUE'
1477                 else:
1478                     secure = 'FALSE'
1479                 if cookie.domain.startswith('.'):
1480                     initial_dot = 'TRUE'
1481                 else:
1482                     initial_dot = 'FALSE'
1483                 if cookie.expires is not None:
1484                     expires = compat_str(cookie.expires)
1485                 else:
1486                     expires = ''
1487                 if cookie.value is None:
1488                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
1489                     # with no name, whereas http.cookiejar regards it as a
1490                     # cookie with no value.
1491                     name = ''
1492                     value = cookie.name
1493                 else:
1494                     name = cookie.name
1495                     value = cookie.value
1496                 f.write(
1497                     '\t'.join([cookie.domain, initial_dot, cookie.path,
1498                                secure, expires, name, value]) + '\n')
1499
1500     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1501         """Load cookies from a file."""
1502         if filename is None:
1503             if self.filename is not None:
1504                 filename = self.filename
1505             else:
1506                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1507
1508         def prepare_line(line):
1509             if line.startswith(self._HTTPONLY_PREFIX):
1510                 line = line[len(self._HTTPONLY_PREFIX):]
1511             # comments and empty lines are fine
1512             if line.startswith('#') or not line.strip():
1513                 return line
1514             cookie_list = line.split('\t')
1515             if len(cookie_list) != self._ENTRY_LEN:
1516                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
1517             cookie = self._CookieFileEntry(*cookie_list)
1518             if cookie.expires_at and not cookie.expires_at.isdigit():
1519                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1520             return line
1521
1522         cf = io.StringIO()
1523         with open(filename, encoding='utf-8') as f:
1524             for line in f:
1525                 try:
1526                     cf.write(prepare_line(line))
1527                 except compat_cookiejar.LoadError as e:
1528                     write_string(
1529                         'WARNING: skipping cookie file entry due to %s: %r\n'
1530                         % (e, line), sys.stderr)
1531                     continue
1532         cf.seek(0)
1533         self._really_load(cf, filename, ignore_discard, ignore_expires)
1534         # Session cookies are denoted by either `expires` field set to
1535         # an empty string or 0. MozillaCookieJar only recognizes the former
1536         # (see [1]). So we need force the latter to be recognized as session
1537         # cookies on our own.
1538         # Session cookies may be important for cookies-based authentication,
1539         # e.g. usually, when user does not check 'Remember me' check box while
1540         # logging in on a site, some important cookies are stored as session
1541         # cookies so that not recognizing them will result in failed login.
1542         # 1. https://bugs.python.org/issue17164
1543         for cookie in self:
1544             # Treat `expires=0` cookies as session cookies
1545             if cookie.expires == 0:
1546                 cookie.expires = None
1547                 cookie.discard = True
1548
1549
1550 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
1551     def __init__(self, cookiejar=None):
1552         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
1553
1554     def http_response(self, request, response):
1555         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
1556
1557     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
1558     https_response = http_response
1559
1560
1561 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
1562     """YoutubeDL redirect handler
1563
1564     The code is based on HTTPRedirectHandler implementation from CPython [1].
1565
1566     This redirect handler solves two issues:
1567      - ensures redirect URL is always unicode under python 2
1568      - introduces support for experimental HTTP response status code
1569        308 Permanent Redirect [2] used by some sites [3]
1570
1571     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1572     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1573     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1574     """
1575
1576     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
1577
1578     def redirect_request(self, req, fp, code, msg, headers, newurl):
1579         """Return a Request or None in response to a redirect.
1580
1581         This is called by the http_error_30x methods when a
1582         redirection response is received.  If a redirection should
1583         take place, return a new Request to allow http_error_30x to
1584         perform the redirect.  Otherwise, raise HTTPError if no-one
1585         else should try to handle this url.  Return None if you can't
1586         but another Handler might.
1587         """
1588         m = req.get_method()
1589         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1590                  or code in (301, 302, 303) and m == "POST")):
1591             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
1592         # Strictly (according to RFC 2616), 301 or 302 in response to
1593         # a POST MUST NOT cause a redirection without confirmation
1594         # from the user (of urllib.request, in this case).  In practice,
1595         # essentially all clients do redirect in this case, so we do
1596         # the same.
1597
1598         # Be conciliant with URIs containing a space.  This is mainly
1599         # redundant with the more complete encoding done in http_error_302(),
1600         # but it is kept for compatibility with other callers.
1601         newurl = newurl.replace(' ', '%20')
1602
1603         CONTENT_HEADERS = ("content-length", "content-type")
1604         # NB: don't use dict comprehension for python 2.6 compatibility
1605         newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
1606         return compat_urllib_request.Request(
1607             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1608             unverifiable=True)
1609
1610
1611 def extract_timezone(date_str):
1612     m = re.search(
1613         r'''(?x)
1614             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1615             (?P<tz>Z|                                            # just the UTC Z, or
1616                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1617                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1618                    [ ]?                                          # optional space
1619                 (?P<sign>\+|-)                                   # +/-
1620                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1621             $)
1622         ''', date_str)
1623     if not m:
1624         timezone = datetime.timedelta()
1625     else:
1626         date_str = date_str[:-len(m.group('tz'))]
1627         if not m.group('sign'):
1628             timezone = datetime.timedelta()
1629         else:
1630             sign = 1 if m.group('sign') == '+' else -1
1631             timezone = datetime.timedelta(
1632                 hours=sign * int(m.group('hours')),
1633                 minutes=sign * int(m.group('minutes')))
1634     return timezone, date_str
1635
1636
1637 def parse_iso8601(date_str, delimiter='T', timezone=None):
1638     """ Return a UNIX timestamp from the given date """
1639
1640     if date_str is None:
1641         return None
1642
1643     date_str = re.sub(r'\.[0-9]+', '', date_str)
1644
1645     if timezone is None:
1646         timezone, date_str = extract_timezone(date_str)
1647
1648     try:
1649         date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
1650         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1651         return calendar.timegm(dt.timetuple())
1652     except ValueError:
1653         pass
1654
1655
1656 def date_formats(day_first=True):
1657     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1658
1659
1660 def unified_strdate(date_str, day_first=True):
1661     """Return a string with the date in the format YYYYMMDD"""
1662
1663     if date_str is None:
1664         return None
1665     upload_date = None
1666     # Replace commas
1667     date_str = date_str.replace(',', ' ')
1668     # Remove AM/PM + timezone
1669     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1670     _, date_str = extract_timezone(date_str)
1671
1672     for expression in date_formats(day_first):
1673         try:
1674             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1675         except ValueError:
1676             pass
1677     if upload_date is None:
1678         timetuple = email.utils.parsedate_tz(date_str)
1679         if timetuple:
1680             try:
1681                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1682             except ValueError:
1683                 pass
1684     if upload_date is not None:
1685         return compat_str(upload_date)
1686
1687
1688 def unified_timestamp(date_str, day_first=True):
1689     if date_str is None:
1690         return None
1691
1692     date_str = re.sub(r'[,|]', '', date_str)
1693
1694     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1695     timezone, date_str = extract_timezone(date_str)
1696
1697     # Remove AM/PM + timezone
1698     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1699
1700     # Remove unrecognized timezones from ISO 8601 alike timestamps
1701     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1702     if m:
1703         date_str = date_str[:-len(m.group('tz'))]
1704
1705     # Python only supports microseconds, so remove nanoseconds
1706     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1707     if m:
1708         date_str = m.group(1)
1709
1710     for expression in date_formats(day_first):
1711         try:
1712             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1713             return calendar.timegm(dt.timetuple())
1714         except ValueError:
1715             pass
1716     timetuple = email.utils.parsedate_tz(date_str)
1717     if timetuple:
1718         return calendar.timegm(timetuple) + pm_delta * 3600
1719
1720
1721 def determine_ext(url, default_ext='unknown_video'):
1722     if url is None or '.' not in url:
1723         return default_ext
1724     guess = url.partition('?')[0].rpartition('.')[2]
1725     if re.match(r'^[A-Za-z0-9]+$', guess):
1726         return guess
1727     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1728     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1729         return guess.rstrip('/')
1730     else:
1731         return default_ext
1732
1733
1734 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1735     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1736
1737
1738 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1739     """
1740     Return a datetime object from a string in the format YYYYMMDD or
1741     (now|today|yesterday|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
1742
1743     format: string date format used to return datetime object from
1744     precision: round the time portion of a datetime object.
1745                 auto|microsecond|second|minute|hour|day.
1746                 auto: round to the unit provided in date_str (if applicable).
1747     """
1748     auto_precision = False
1749     if precision == 'auto':
1750         auto_precision = True
1751         precision = 'microsecond'
1752     today = datetime_round(datetime.datetime.utcnow(), precision)
1753     if date_str in ('now', 'today'):
1754         return today
1755     if date_str == 'yesterday':
1756         return today - datetime.timedelta(days=1)
1757     match = re.match(
1758         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
1759         date_str)
1760     if match is not None:
1761         start_time = datetime_from_str(match.group('start'), precision, format)
1762         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1763         unit = match.group('unit')
1764         if unit == 'month' or unit == 'year':
1765             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1766             unit = 'day'
1767         else:
1768             if unit == 'week':
1769                 unit = 'day'
1770                 time *= 7
1771             delta = datetime.timedelta(**{unit + 's': time})
1772             new_date = start_time + delta
1773         if auto_precision:
1774             return datetime_round(new_date, unit)
1775         return new_date
1776
1777     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1778
1779
1780 def date_from_str(date_str, format='%Y%m%d', strict=False):
1781     """
1782     Return a datetime object from a string in the format YYYYMMDD or
1783     (now|today|yesterday|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
1784
1785     If "strict", only (now|today)[+-][0-9](day|week|month|year)(s)? is allowed
1786
1787     format: string date format used to return datetime object from
1788     """
1789     if strict and not re.fullmatch(r'\d{8}|(now|today)[+-]\d+(day|week|month|year)(s)?', date_str):
1790         raise ValueError(f'Invalid date format {date_str}')
1791     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1792
1793
1794 def datetime_add_months(dt, months):
1795     """Increment/Decrement a datetime object by months."""
1796     month = dt.month + months - 1
1797     year = dt.year + month // 12
1798     month = month % 12 + 1
1799     day = min(dt.day, calendar.monthrange(year, month)[1])
1800     return dt.replace(year, month, day)
1801
1802
1803 def datetime_round(dt, precision='day'):
1804     """
1805     Round a datetime object's time to a specific precision
1806     """
1807     if precision == 'microsecond':
1808         return dt
1809
1810     unit_seconds = {
1811         'day': 86400,
1812         'hour': 3600,
1813         'minute': 60,
1814         'second': 1,
1815     }
1816     roundto = lambda x, n: ((x + n / 2) // n) * n
1817     timestamp = calendar.timegm(dt.timetuple())
1818     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1819
1820
1821 def hyphenate_date(date_str):
1822     """
1823     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1824     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1825     if match is not None:
1826         return '-'.join(match.groups())
1827     else:
1828         return date_str
1829
1830
1831 class DateRange:
1832     """Represents a time interval between two dates"""
1833
1834     def __init__(self, start=None, end=None):
1835         """start and end must be strings in the format accepted by date"""
1836         if start is not None:
1837             self.start = date_from_str(start, strict=True)
1838         else:
1839             self.start = datetime.datetime.min.date()
1840         if end is not None:
1841             self.end = date_from_str(end, strict=True)
1842         else:
1843             self.end = datetime.datetime.max.date()
1844         if self.start > self.end:
1845             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1846
1847     @classmethod
1848     def day(cls, day):
1849         """Returns a range that only contains the given day"""
1850         return cls(day, day)
1851
1852     def __contains__(self, date):
1853         """Check if the date is in the range"""
1854         if not isinstance(date, datetime.date):
1855             date = date_from_str(date)
1856         return self.start <= date <= self.end
1857
1858     def __str__(self):
1859         return f'{self.start.isoformat()} - {self.end.isoformat()}'
1860
1861
1862 def platform_name():
1863     """ Returns the platform name as a compat_str """
1864     res = platform.platform()
1865     if isinstance(res, bytes):
1866         res = res.decode(preferredencoding())
1867
1868     assert isinstance(res, compat_str)
1869     return res
1870
1871
1872 def get_windows_version():
1873     ''' Get Windows version. None if it's not running on Windows '''
1874     if compat_os_name == 'nt':
1875         return version_tuple(platform.win32_ver()[1])
1876     else:
1877         return None
1878
1879
1880 def write_string(s, out=None, encoding=None):
1881     if out is None:
1882         out = sys.stderr
1883     assert type(s) == compat_str
1884
1885     if 'b' in getattr(out, 'mode', ''):
1886         byt = s.encode(encoding or preferredencoding(), 'ignore')
1887         out.write(byt)
1888     elif hasattr(out, 'buffer'):
1889         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1890         byt = s.encode(enc, 'ignore')
1891         out.buffer.write(byt)
1892     else:
1893         out.write(s)
1894     out.flush()
1895
1896
1897 def bytes_to_intlist(bs):
1898     if not bs:
1899         return []
1900     if isinstance(bs[0], int):  # Python 3
1901         return list(bs)
1902     else:
1903         return [ord(c) for c in bs]
1904
1905
1906 def intlist_to_bytes(xs):
1907     if not xs:
1908         return b''
1909     return compat_struct_pack('%dB' % len(xs), *xs)
1910
1911
1912 class LockingUnsupportedError(IOError):
1913     msg = 'File locking is not supported on this platform'
1914
1915     def __init__(self):
1916         super().__init__(self.msg)
1917
1918
1919 # Cross-platform file locking
1920 if sys.platform == 'win32':
1921     import ctypes.wintypes
1922     import msvcrt
1923
1924     class OVERLAPPED(ctypes.Structure):
1925         _fields_ = [
1926             ('Internal', ctypes.wintypes.LPVOID),
1927             ('InternalHigh', ctypes.wintypes.LPVOID),
1928             ('Offset', ctypes.wintypes.DWORD),
1929             ('OffsetHigh', ctypes.wintypes.DWORD),
1930             ('hEvent', ctypes.wintypes.HANDLE),
1931         ]
1932
1933     kernel32 = ctypes.windll.kernel32
1934     LockFileEx = kernel32.LockFileEx
1935     LockFileEx.argtypes = [
1936         ctypes.wintypes.HANDLE,     # hFile
1937         ctypes.wintypes.DWORD,      # dwFlags
1938         ctypes.wintypes.DWORD,      # dwReserved
1939         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
1940         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
1941         ctypes.POINTER(OVERLAPPED)  # Overlapped
1942     ]
1943     LockFileEx.restype = ctypes.wintypes.BOOL
1944     UnlockFileEx = kernel32.UnlockFileEx
1945     UnlockFileEx.argtypes = [
1946         ctypes.wintypes.HANDLE,     # hFile
1947         ctypes.wintypes.DWORD,      # dwReserved
1948         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
1949         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
1950         ctypes.POINTER(OVERLAPPED)  # Overlapped
1951     ]
1952     UnlockFileEx.restype = ctypes.wintypes.BOOL
1953     whole_low = 0xffffffff
1954     whole_high = 0x7fffffff
1955
1956     def _lock_file(f, exclusive, block):
1957         overlapped = OVERLAPPED()
1958         overlapped.Offset = 0
1959         overlapped.OffsetHigh = 0
1960         overlapped.hEvent = 0
1961         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1962
1963         if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
1964                           (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
1965                           0, whole_low, whole_high, f._lock_file_overlapped_p):
1966             raise BlockingIOError('Locking file failed: %r' % ctypes.FormatError())
1967
1968     def _unlock_file(f):
1969         assert f._lock_file_overlapped_p
1970         handle = msvcrt.get_osfhandle(f.fileno())
1971         if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
1972             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1973
1974 else:
1975     try:
1976         import fcntl
1977
1978         def _lock_file(f, exclusive, block):
1979             flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
1980             if not block:
1981                 flags |= fcntl.LOCK_NB
1982             try:
1983                 fcntl.flock(f, flags)
1984             except BlockingIOError:
1985                 raise
1986             except OSError:  # AOSP does not have flock()
1987                 fcntl.lockf(f, flags)
1988
1989         def _unlock_file(f):
1990             try:
1991                 fcntl.flock(f, fcntl.LOCK_UN)
1992             except OSError:
1993                 fcntl.lockf(f, fcntl.LOCK_UN)
1994
1995     except ImportError:
1996
1997         def _lock_file(f, exclusive, block):
1998             raise LockingUnsupportedError()
1999
2000         def _unlock_file(f):
2001             raise LockingUnsupportedError()
2002
2003
2004 class locked_file:
2005     locked = False
2006
2007     def __init__(self, filename, mode, block=True, encoding=None):
2008         if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
2009             raise NotImplementedError(mode)
2010         self.mode, self.block = mode, block
2011
2012         writable = any(f in mode for f in 'wax+')
2013         readable = any(f in mode for f in 'r+')
2014         flags = functools.reduce(operator.ior, (
2015             getattr(os, 'O_CLOEXEC', 0),  # UNIX only
2016             getattr(os, 'O_BINARY', 0),  # Windows only
2017             getattr(os, 'O_NOINHERIT', 0),  # Windows only
2018             os.O_CREAT if writable else 0,  # O_TRUNC only after locking
2019             os.O_APPEND if 'a' in mode else 0,
2020             os.O_EXCL if 'x' in mode else 0,
2021             os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
2022         ))
2023
2024         self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
2025
2026     def __enter__(self):
2027         exclusive = 'r' not in self.mode
2028         try:
2029             _lock_file(self.f, exclusive, self.block)
2030             self.locked = True
2031         except OSError:
2032             self.f.close()
2033             raise
2034         if 'w' in self.mode:
2035             self.f.truncate()
2036         return self
2037
2038     def unlock(self):
2039         if not self.locked:
2040             return
2041         try:
2042             _unlock_file(self.f)
2043         finally:
2044             self.locked = False
2045
2046     def __exit__(self, *_):
2047         try:
2048             self.unlock()
2049         finally:
2050             self.f.close()
2051
2052     open = __enter__
2053     close = __exit__
2054
2055     def __getattr__(self, attr):
2056         return getattr(self.f, attr)
2057
2058     def __iter__(self):
2059         return iter(self.f)
2060
2061
2062 def get_filesystem_encoding():
2063     encoding = sys.getfilesystemencoding()
2064     return encoding if encoding is not None else 'utf-8'
2065
2066
2067 def shell_quote(args):
2068     quoted_args = []
2069     encoding = get_filesystem_encoding()
2070     for a in args:
2071         if isinstance(a, bytes):
2072             # We may get a filename encoded with 'encodeFilename'
2073             a = a.decode(encoding)
2074         quoted_args.append(compat_shlex_quote(a))
2075     return ' '.join(quoted_args)
2076
2077
2078 def smuggle_url(url, data):
2079     """ Pass additional data in a URL for internal use. """
2080
2081     url, idata = unsmuggle_url(url, {})
2082     data.update(idata)
2083     sdata = compat_urllib_parse_urlencode(
2084         {'__youtubedl_smuggle': json.dumps(data)})
2085     return url + '#' + sdata
2086
2087
2088 def unsmuggle_url(smug_url, default=None):
2089     if '#__youtubedl_smuggle' not in smug_url:
2090         return smug_url, default
2091     url, _, sdata = smug_url.rpartition('#')
2092     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
2093     data = json.loads(jsond)
2094     return url, data
2095
2096
2097 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2098     """ Formats numbers with decimal sufixes like K, M, etc """
2099     num, factor = float_or_none(num), float(factor)
2100     if num is None or num < 0:
2101         return None
2102     POSSIBLE_SUFFIXES = 'kMGTPEZY'
2103     exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
2104     suffix = ['', *POSSIBLE_SUFFIXES][exponent]
2105     if factor == 1024:
2106         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2107     converted = num / (factor ** exponent)
2108     return fmt % (converted, suffix)
2109
2110
2111 def format_bytes(bytes):
2112     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2113
2114
2115 def lookup_unit_table(unit_table, s):
2116     units_re = '|'.join(re.escape(u) for u in unit_table)
2117     m = re.match(
2118         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
2119     if not m:
2120         return None
2121     num_str = m.group('num').replace(',', '.')
2122     mult = unit_table[m.group('unit')]
2123     return int(float(num_str) * mult)
2124
2125
2126 def parse_filesize(s):
2127     if s is None:
2128         return None
2129
2130     # The lower-case forms are of course incorrect and unofficial,
2131     # but we support those too
2132     _UNIT_TABLE = {
2133         'B': 1,
2134         'b': 1,
2135         'bytes': 1,
2136         'KiB': 1024,
2137         'KB': 1000,
2138         'kB': 1024,
2139         'Kb': 1000,
2140         'kb': 1000,
2141         'kilobytes': 1000,
2142         'kibibytes': 1024,
2143         'MiB': 1024 ** 2,
2144         'MB': 1000 ** 2,
2145         'mB': 1024 ** 2,
2146         'Mb': 1000 ** 2,
2147         'mb': 1000 ** 2,
2148         'megabytes': 1000 ** 2,
2149         'mebibytes': 1024 ** 2,
2150         'GiB': 1024 ** 3,
2151         'GB': 1000 ** 3,
2152         'gB': 1024 ** 3,
2153         'Gb': 1000 ** 3,
2154         'gb': 1000 ** 3,
2155         'gigabytes': 1000 ** 3,
2156         'gibibytes': 1024 ** 3,
2157         'TiB': 1024 ** 4,
2158         'TB': 1000 ** 4,
2159         'tB': 1024 ** 4,
2160         'Tb': 1000 ** 4,
2161         'tb': 1000 ** 4,
2162         'terabytes': 1000 ** 4,
2163         'tebibytes': 1024 ** 4,
2164         'PiB': 1024 ** 5,
2165         'PB': 1000 ** 5,
2166         'pB': 1024 ** 5,
2167         'Pb': 1000 ** 5,
2168         'pb': 1000 ** 5,
2169         'petabytes': 1000 ** 5,
2170         'pebibytes': 1024 ** 5,
2171         'EiB': 1024 ** 6,
2172         'EB': 1000 ** 6,
2173         'eB': 1024 ** 6,
2174         'Eb': 1000 ** 6,
2175         'eb': 1000 ** 6,
2176         'exabytes': 1000 ** 6,
2177         'exbibytes': 1024 ** 6,
2178         'ZiB': 1024 ** 7,
2179         'ZB': 1000 ** 7,
2180         'zB': 1024 ** 7,
2181         'Zb': 1000 ** 7,
2182         'zb': 1000 ** 7,
2183         'zettabytes': 1000 ** 7,
2184         'zebibytes': 1024 ** 7,
2185         'YiB': 1024 ** 8,
2186         'YB': 1000 ** 8,
2187         'yB': 1024 ** 8,
2188         'Yb': 1000 ** 8,
2189         'yb': 1000 ** 8,
2190         'yottabytes': 1000 ** 8,
2191         'yobibytes': 1024 ** 8,
2192     }
2193
2194     return lookup_unit_table(_UNIT_TABLE, s)
2195
2196
2197 def parse_count(s):
2198     if s is None:
2199         return None
2200
2201     s = re.sub(r'^[^\d]+\s', '', s).strip()
2202
2203     if re.match(r'^[\d,.]+$', s):
2204         return str_to_int(s)
2205
2206     _UNIT_TABLE = {
2207         'k': 1000,
2208         'K': 1000,
2209         'm': 1000 ** 2,
2210         'M': 1000 ** 2,
2211         'kk': 1000 ** 2,
2212         'KK': 1000 ** 2,
2213         'b': 1000 ** 3,
2214         'B': 1000 ** 3,
2215     }
2216
2217     ret = lookup_unit_table(_UNIT_TABLE, s)
2218     if ret is not None:
2219         return ret
2220
2221     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2222     if mobj:
2223         return str_to_int(mobj.group(1))
2224
2225
2226 def parse_resolution(s, *, lenient=False):
2227     if s is None:
2228         return {}
2229
2230     if lenient:
2231         mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
2232     else:
2233         mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2234     if mobj:
2235         return {
2236             'width': int(mobj.group('w')),
2237             'height': int(mobj.group('h')),
2238         }
2239
2240     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2241     if mobj:
2242         return {'height': int(mobj.group(1))}
2243
2244     mobj = re.search(r'\b([48])[kK]\b', s)
2245     if mobj:
2246         return {'height': int(mobj.group(1)) * 540}
2247
2248     return {}
2249
2250
2251 def parse_bitrate(s):
2252     if not isinstance(s, compat_str):
2253         return
2254     mobj = re.search(r'\b(\d+)\s*kbps', s)
2255     if mobj:
2256         return int(mobj.group(1))
2257
2258
2259 def month_by_name(name, lang='en'):
2260     """ Return the number of a month by (locale-independently) English name """
2261
2262     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2263
2264     try:
2265         return month_names.index(name) + 1
2266     except ValueError:
2267         return None
2268
2269
2270 def month_by_abbreviation(abbrev):
2271     """ Return the number of a month by (locale-independently) English
2272         abbreviations """
2273
2274     try:
2275         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2276     except ValueError:
2277         return None
2278
2279
2280 def fix_xml_ampersands(xml_str):
2281     """Replace all the '&' by '&amp;' in XML"""
2282     return re.sub(
2283         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2284         '&amp;',
2285         xml_str)
2286
2287
2288 def setproctitle(title):
2289     assert isinstance(title, compat_str)
2290
2291     # ctypes in Jython is not complete
2292     # http://bugs.jython.org/issue2148
2293     if sys.platform.startswith('java'):
2294         return
2295
2296     try:
2297         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2298     except OSError:
2299         return
2300     except TypeError:
2301         # LoadLibrary in Windows Python 2.7.13 only expects
2302         # a bytestring, but since unicode_literals turns
2303         # every string into a unicode string, it fails.
2304         return
2305     title_bytes = title.encode('utf-8')
2306     buf = ctypes.create_string_buffer(len(title_bytes))
2307     buf.value = title_bytes
2308     try:
2309         libc.prctl(15, buf, 0, 0, 0)
2310     except AttributeError:
2311         return  # Strange libc, just skip this
2312
2313
2314 def remove_start(s, start):
2315     return s[len(start):] if s is not None and s.startswith(start) else s
2316
2317
2318 def remove_end(s, end):
2319     return s[:-len(end)] if s is not None and s.endswith(end) else s
2320
2321
2322 def remove_quotes(s):
2323     if s is None or len(s) < 2:
2324         return s
2325     for quote in ('"', "'", ):
2326         if s[0] == quote and s[-1] == quote:
2327             return s[1:-1]
2328     return s
2329
2330
2331 def get_domain(url):
2332     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
2333     return domain.group('domain') if domain else None
2334
2335
2336 def url_basename(url):
2337     path = compat_urlparse.urlparse(url).path
2338     return path.strip('/').split('/')[-1]
2339
2340
2341 def base_url(url):
2342     return re.match(r'https?://[^?#&]+/', url).group()
2343
2344
2345 def urljoin(base, path):
2346     if isinstance(path, bytes):
2347         path = path.decode('utf-8')
2348     if not isinstance(path, compat_str) or not path:
2349         return None
2350     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2351         return path
2352     if isinstance(base, bytes):
2353         base = base.decode('utf-8')
2354     if not isinstance(base, compat_str) or not re.match(
2355             r'^(?:https?:)?//', base):
2356         return None
2357     return compat_urlparse.urljoin(base, path)
2358
2359
2360 class HEADRequest(compat_urllib_request.Request):
2361     def get_method(self):
2362         return 'HEAD'
2363
2364
2365 class PUTRequest(compat_urllib_request.Request):
2366     def get_method(self):
2367         return 'PUT'
2368
2369
2370 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2371     if get_attr and v is not None:
2372         v = getattr(v, get_attr, None)
2373     try:
2374         return int(v) * invscale // scale
2375     except (ValueError, TypeError, OverflowError):
2376         return default
2377
2378
2379 def str_or_none(v, default=None):
2380     return default if v is None else compat_str(v)
2381
2382
2383 def str_to_int(int_str):
2384     """ A more relaxed version of int_or_none """
2385     if isinstance(int_str, int):
2386         return int_str
2387     elif isinstance(int_str, compat_str):
2388         int_str = re.sub(r'[,\.\+]', '', int_str)
2389         return int_or_none(int_str)
2390
2391
2392 def float_or_none(v, scale=1, invscale=1, default=None):
2393     if v is None:
2394         return default
2395     try:
2396         return float(v) * invscale / scale
2397     except (ValueError, TypeError):
2398         return default
2399
2400
2401 def bool_or_none(v, default=None):
2402     return v if isinstance(v, bool) else default
2403
2404
2405 def strip_or_none(v, default=None):
2406     return v.strip() if isinstance(v, compat_str) else default
2407
2408
2409 def url_or_none(url):
2410     if not url or not isinstance(url, compat_str):
2411         return None
2412     url = url.strip()
2413     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2414
2415
2416 def request_to_url(req):
2417     if isinstance(req, compat_urllib_request.Request):
2418         return req.get_full_url()
2419     else:
2420         return req
2421
2422
2423 def strftime_or_none(timestamp, date_format, default=None):
2424     datetime_object = None
2425     try:
2426         if isinstance(timestamp, (int, float)):  # unix timestamp
2427             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
2428         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
2429             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2430         return datetime_object.strftime(date_format)
2431     except (ValueError, TypeError, AttributeError):
2432         return default
2433
2434
2435 def parse_duration(s):
2436     if not isinstance(s, str):
2437         return None
2438     s = s.strip()
2439     if not s:
2440         return None
2441
2442     days, hours, mins, secs, ms = [None] * 5
2443     m = re.match(r'''(?x)
2444             (?P<before_secs>
2445                 (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
2446             (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
2447             (?P<ms>[.:][0-9]+)?Z?$
2448         ''', s)
2449     if m:
2450         days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
2451     else:
2452         m = re.match(
2453             r'''(?ix)(?:P?
2454                 (?:
2455                     [0-9]+\s*y(?:ears?)?,?\s*
2456                 )?
2457                 (?:
2458                     [0-9]+\s*m(?:onths?)?,?\s*
2459                 )?
2460                 (?:
2461                     [0-9]+\s*w(?:eeks?)?,?\s*
2462                 )?
2463                 (?:
2464                     (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
2465                 )?
2466                 T)?
2467                 (?:
2468                     (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
2469                 )?
2470                 (?:
2471                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
2472                 )?
2473                 (?:
2474                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2475                 )?Z?$''', s)
2476         if m:
2477             days, hours, mins, secs, ms = m.groups()
2478         else:
2479             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2480             if m:
2481                 hours, mins = m.groups()
2482             else:
2483                 return None
2484
2485     duration = 0
2486     if secs:
2487         duration += float(secs)
2488     if mins:
2489         duration += float(mins) * 60
2490     if hours:
2491         duration += float(hours) * 60 * 60
2492     if days:
2493         duration += float(days) * 24 * 60 * 60
2494     if ms:
2495         duration += float(ms.replace(':', '.'))
2496     return duration
2497
2498
2499 def prepend_extension(filename, ext, expected_real_ext=None):
2500     name, real_ext = os.path.splitext(filename)
2501     return (
2502         f'{name}.{ext}{real_ext}'
2503         if not expected_real_ext or real_ext[1:] == expected_real_ext
2504         else f'{filename}.{ext}')
2505
2506
2507 def replace_extension(filename, ext, expected_real_ext=None):
2508     name, real_ext = os.path.splitext(filename)
2509     return '{}.{}'.format(
2510         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2511         ext)
2512
2513
2514 def check_executable(exe, args=[]):
2515     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2516     args can be a list of arguments for a short output (like -version) """
2517     try:
2518         Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
2519     except OSError:
2520         return False
2521     return exe
2522
2523
2524 def _get_exe_version_output(exe, args, *, to_screen=None):
2525     if to_screen:
2526         to_screen(f'Checking exe version: {shell_quote([exe] + args)}')
2527     try:
2528         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2529         # SIGTTOU if yt-dlp is run in the background.
2530         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2531         out, _ = Popen(
2532             [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
2533             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
2534     except OSError:
2535         return False
2536     if isinstance(out, bytes):  # Python 2.x
2537         out = out.decode('ascii', 'ignore')
2538     return out
2539
2540
2541 def detect_exe_version(output, version_re=None, unrecognized='present'):
2542     assert isinstance(output, compat_str)
2543     if version_re is None:
2544         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2545     m = re.search(version_re, output)
2546     if m:
2547         return m.group(1)
2548     else:
2549         return unrecognized
2550
2551
2552 def get_exe_version(exe, args=['--version'],
2553                     version_re=None, unrecognized='present'):
2554     """ Returns the version of the specified executable,
2555     or False if the executable is not present """
2556     out = _get_exe_version_output(exe, args)
2557     return detect_exe_version(out, version_re, unrecognized) if out else False
2558
2559
2560 class LazyList(collections.abc.Sequence):
2561     ''' Lazy immutable list from an iterable
2562     Note that slices of a LazyList are lists and not LazyList'''
2563
2564     class IndexError(IndexError):
2565         pass
2566
2567     def __init__(self, iterable, *, reverse=False, _cache=None):
2568         self.__iterable = iter(iterable)
2569         self.__cache = [] if _cache is None else _cache
2570         self.__reversed = reverse
2571
2572     def __iter__(self):
2573         if self.__reversed:
2574             # We need to consume the entire iterable to iterate in reverse
2575             yield from self.exhaust()
2576             return
2577         yield from self.__cache
2578         for item in self.__iterable:
2579             self.__cache.append(item)
2580             yield item
2581
2582     def __exhaust(self):
2583         self.__cache.extend(self.__iterable)
2584         # Discard the emptied iterable to make it pickle-able
2585         self.__iterable = []
2586         return self.__cache
2587
2588     def exhaust(self):
2589         ''' Evaluate the entire iterable '''
2590         return self.__exhaust()[::-1 if self.__reversed else 1]
2591
2592     @staticmethod
2593     def __reverse_index(x):
2594         return None if x is None else -(x + 1)
2595
2596     def __getitem__(self, idx):
2597         if isinstance(idx, slice):
2598             if self.__reversed:
2599                 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
2600             start, stop, step = idx.start, idx.stop, idx.step or 1
2601         elif isinstance(idx, int):
2602             if self.__reversed:
2603                 idx = self.__reverse_index(idx)
2604             start, stop, step = idx, idx, 0
2605         else:
2606             raise TypeError('indices must be integers or slices')
2607         if ((start or 0) < 0 or (stop or 0) < 0
2608                 or (start is None and step < 0)
2609                 or (stop is None and step > 0)):
2610             # We need to consume the entire iterable to be able to slice from the end
2611             # Obviously, never use this with infinite iterables
2612             self.__exhaust()
2613             try:
2614                 return self.__cache[idx]
2615             except IndexError as e:
2616                 raise self.IndexError(e) from e
2617         n = max(start or 0, stop or 0) - len(self.__cache) + 1
2618         if n > 0:
2619             self.__cache.extend(itertools.islice(self.__iterable, n))
2620         try:
2621             return self.__cache[idx]
2622         except IndexError as e:
2623             raise self.IndexError(e) from e
2624
2625     def __bool__(self):
2626         try:
2627             self[-1] if self.__reversed else self[0]
2628         except self.IndexError:
2629             return False
2630         return True
2631
2632     def __len__(self):
2633         self.__exhaust()
2634         return len(self.__cache)
2635
2636     def __reversed__(self):
2637         return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache)
2638
2639     def __copy__(self):
2640         return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache)
2641
2642     def __repr__(self):
2643         # repr and str should mimic a list. So we exhaust the iterable
2644         return repr(self.exhaust())
2645
2646     def __str__(self):
2647         return repr(self.exhaust())
2648
2649
2650 class PagedList:
2651
2652     class IndexError(IndexError):
2653         pass
2654
2655     def __len__(self):
2656         # This is only useful for tests
2657         return len(self.getslice())
2658
2659     def __init__(self, pagefunc, pagesize, use_cache=True):
2660         self._pagefunc = pagefunc
2661         self._pagesize = pagesize
2662         self._pagecount = float('inf')
2663         self._use_cache = use_cache
2664         self._cache = {}
2665
2666     def getpage(self, pagenum):
2667         page_results = self._cache.get(pagenum)
2668         if page_results is None:
2669             page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
2670         if self._use_cache:
2671             self._cache[pagenum] = page_results
2672         return page_results
2673
2674     def getslice(self, start=0, end=None):
2675         return list(self._getslice(start, end))
2676
2677     def _getslice(self, start, end):
2678         raise NotImplementedError('This method must be implemented by subclasses')
2679
2680     def __getitem__(self, idx):
2681         assert self._use_cache, 'Indexing PagedList requires cache'
2682         if not isinstance(idx, int) or idx < 0:
2683             raise TypeError('indices must be non-negative integers')
2684         entries = self.getslice(idx, idx + 1)
2685         if not entries:
2686             raise self.IndexError()
2687         return entries[0]
2688
2689
2690 class OnDemandPagedList(PagedList):
2691     """Download pages until a page with less than maximum results"""
2692
2693     def _getslice(self, start, end):
2694         for pagenum in itertools.count(start // self._pagesize):
2695             firstid = pagenum * self._pagesize
2696             nextfirstid = pagenum * self._pagesize + self._pagesize
2697             if start >= nextfirstid:
2698                 continue
2699
2700             startv = (
2701                 start % self._pagesize
2702                 if firstid <= start < nextfirstid
2703                 else 0)
2704             endv = (
2705                 ((end - 1) % self._pagesize) + 1
2706                 if (end is not None and firstid <= end <= nextfirstid)
2707                 else None)
2708
2709             try:
2710                 page_results = self.getpage(pagenum)
2711             except Exception:
2712                 self._pagecount = pagenum - 1
2713                 raise
2714             if startv != 0 or endv is not None:
2715                 page_results = page_results[startv:endv]
2716             yield from page_results
2717
2718             # A little optimization - if current page is not "full", ie. does
2719             # not contain page_size videos then we can assume that this page
2720             # is the last one - there are no more ids on further pages -
2721             # i.e. no need to query again.
2722             if len(page_results) + startv < self._pagesize:
2723                 break
2724
2725             # If we got the whole page, but the next page is not interesting,
2726             # break out early as well
2727             if end == nextfirstid:
2728                 break
2729
2730
2731 class InAdvancePagedList(PagedList):
2732     """PagedList with total number of pages known in advance"""
2733
2734     def __init__(self, pagefunc, pagecount, pagesize):
2735         PagedList.__init__(self, pagefunc, pagesize, True)
2736         self._pagecount = pagecount
2737
2738     def _getslice(self, start, end):
2739         start_page = start // self._pagesize
2740         end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
2741         skip_elems = start - start_page * self._pagesize
2742         only_more = None if end is None else end - start
2743         for pagenum in range(start_page, end_page):
2744             page_results = self.getpage(pagenum)
2745             if skip_elems:
2746                 page_results = page_results[skip_elems:]
2747                 skip_elems = None
2748             if only_more is not None:
2749                 if len(page_results) < only_more:
2750                     only_more -= len(page_results)
2751                 else:
2752                     yield from page_results[:only_more]
2753                     break
2754             yield from page_results
2755
2756
2757 def uppercase_escape(s):
2758     unicode_escape = codecs.getdecoder('unicode_escape')
2759     return re.sub(
2760         r'\\U[0-9a-fA-F]{8}',
2761         lambda m: unicode_escape(m.group(0))[0],
2762         s)
2763
2764
2765 def lowercase_escape(s):
2766     unicode_escape = codecs.getdecoder('unicode_escape')
2767     return re.sub(
2768         r'\\u[0-9a-fA-F]{4}',
2769         lambda m: unicode_escape(m.group(0))[0],
2770         s)
2771
2772
2773 def escape_rfc3986(s):
2774     """Escape non-ASCII characters as suggested by RFC 3986"""
2775     return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
2776
2777
2778 def escape_url(url):
2779     """Escape URL as suggested by RFC 3986"""
2780     url_parsed = compat_urllib_parse_urlparse(url)
2781     return url_parsed._replace(
2782         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
2783         path=escape_rfc3986(url_parsed.path),
2784         params=escape_rfc3986(url_parsed.params),
2785         query=escape_rfc3986(url_parsed.query),
2786         fragment=escape_rfc3986(url_parsed.fragment)
2787     ).geturl()
2788
2789
2790 def parse_qs(url):
2791     return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2792
2793
2794 def read_batch_urls(batch_fd):
2795     def fixup(url):
2796         if not isinstance(url, compat_str):
2797             url = url.decode('utf-8', 'replace')
2798         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
2799         for bom in BOM_UTF8:
2800             if url.startswith(bom):
2801                 url = url[len(bom):]
2802         url = url.lstrip()
2803         if not url or url.startswith(('#', ';', ']')):
2804             return False
2805         # "#" cannot be stripped out since it is part of the URI
2806         # However, it can be safely stipped out if follwing a whitespace
2807         return re.split(r'\s#', url, 1)[0].rstrip()
2808
2809     with contextlib.closing(batch_fd) as fd:
2810         return [url for url in map(fixup, fd) if url]
2811
2812
2813 def urlencode_postdata(*args, **kargs):
2814     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
2815
2816
2817 def update_url_query(url, query):
2818     if not query:
2819         return url
2820     parsed_url = compat_urlparse.urlparse(url)
2821     qs = compat_parse_qs(parsed_url.query)
2822     qs.update(query)
2823     return compat_urlparse.urlunparse(parsed_url._replace(
2824         query=compat_urllib_parse_urlencode(qs, True)))
2825
2826
2827 def update_Request(req, url=None, data=None, headers={}, query={}):
2828     req_headers = req.headers.copy()
2829     req_headers.update(headers)
2830     req_data = data or req.data
2831     req_url = update_url_query(url or req.get_full_url(), query)
2832     req_get_method = req.get_method()
2833     if req_get_method == 'HEAD':
2834         req_type = HEADRequest
2835     elif req_get_method == 'PUT':
2836         req_type = PUTRequest
2837     else:
2838         req_type = compat_urllib_request.Request
2839     new_req = req_type(
2840         req_url, data=req_data, headers=req_headers,
2841         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
2842     if hasattr(req, 'timeout'):
2843         new_req.timeout = req.timeout
2844     return new_req
2845
2846
2847 def _multipart_encode_impl(data, boundary):
2848     content_type = 'multipart/form-data; boundary=%s' % boundary
2849
2850     out = b''
2851     for k, v in data.items():
2852         out += b'--' + boundary.encode('ascii') + b'\r\n'
2853         if isinstance(k, compat_str):
2854             k = k.encode('utf-8')
2855         if isinstance(v, compat_str):
2856             v = v.encode('utf-8')
2857         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
2858         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
2859         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
2860         if boundary.encode('ascii') in content:
2861             raise ValueError('Boundary overlaps with data')
2862         out += content
2863
2864     out += b'--' + boundary.encode('ascii') + b'--\r\n'
2865
2866     return out, content_type
2867
2868
2869 def multipart_encode(data, boundary=None):
2870     '''
2871     Encode a dict to RFC 7578-compliant form-data
2872
2873     data:
2874         A dict where keys and values can be either Unicode or bytes-like
2875         objects.
2876     boundary:
2877         If specified a Unicode object, it's used as the boundary. Otherwise
2878         a random boundary is generated.
2879
2880     Reference: https://tools.ietf.org/html/rfc7578
2881     '''
2882     has_specified_boundary = boundary is not None
2883
2884     while True:
2885         if boundary is None:
2886             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
2887
2888         try:
2889             out, content_type = _multipart_encode_impl(data, boundary)
2890             break
2891         except ValueError:
2892             if has_specified_boundary:
2893                 raise
2894             boundary = None
2895
2896     return out, content_type
2897
2898
2899 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
2900     for val in map(d.get, variadic(key_or_keys)):
2901         if val is not None and (val or not skip_false_values):
2902             return val
2903     return default
2904
2905
2906 def try_call(*funcs, expected_type=None, args=[], kwargs={}):
2907     for f in funcs:
2908         try:
2909             val = f(*args, **kwargs)
2910         except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
2911             pass
2912         else:
2913             if expected_type is None or isinstance(val, expected_type):
2914                 return val
2915
2916
2917 def try_get(src, getter, expected_type=None):
2918     return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
2919
2920
2921 def filter_dict(dct, cndn=lambda _, v: v is not None):
2922     return {k: v for k, v in dct.items() if cndn(k, v)}
2923
2924
2925 def merge_dicts(*dicts):
2926     merged = {}
2927     for a_dict in dicts:
2928         for k, v in a_dict.items():
2929             if (v is not None and k not in merged
2930                     or isinstance(v, str) and merged[k] == ''):
2931                 merged[k] = v
2932     return merged
2933
2934
2935 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
2936     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
2937
2938
2939 US_RATINGS = {
2940     'G': 0,
2941     'PG': 10,
2942     'PG-13': 13,
2943     'R': 16,
2944     'NC': 18,
2945 }
2946
2947
2948 TV_PARENTAL_GUIDELINES = {
2949     'TV-Y': 0,
2950     'TV-Y7': 7,
2951     'TV-G': 0,
2952     'TV-PG': 0,
2953     'TV-14': 14,
2954     'TV-MA': 17,
2955 }
2956
2957
2958 def parse_age_limit(s):
2959     if type(s) == int:
2960         return s if 0 <= s <= 21 else None
2961     if not isinstance(s, str):
2962         return None
2963     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
2964     if m:
2965         return int(m.group('age'))
2966     s = s.upper()
2967     if s in US_RATINGS:
2968         return US_RATINGS[s]
2969     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
2970     if m:
2971         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
2972     return None
2973
2974
2975 def strip_jsonp(code):
2976     return re.sub(
2977         r'''(?sx)^
2978             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
2979             (?:\s*&&\s*(?P=func_name))?
2980             \s*\(\s*(?P<callback_data>.*)\);?
2981             \s*?(?://[^\n]*)*$''',
2982         r'\g<callback_data>', code)
2983
2984
2985 def js_to_json(code, vars={}):
2986     # vars is a dict of var, val pairs to substitute
2987     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
2988     SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
2989     INTEGER_TABLE = (
2990         (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
2991         (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
2992     )
2993
2994     def fix_kv(m):
2995         v = m.group(0)
2996         if v in ('true', 'false', 'null'):
2997             return v
2998         elif v in ('undefined', 'void 0'):
2999             return 'null'
3000         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
3001             return ""
3002
3003         if v[0] in ("'", '"'):
3004             v = re.sub(r'(?s)\\.|"', lambda m: {
3005                 '"': '\\"',
3006                 "\\'": "'",
3007                 '\\\n': '',
3008                 '\\x': '\\u00',
3009             }.get(m.group(0), m.group(0)), v[1:-1])
3010         else:
3011             for regex, base in INTEGER_TABLE:
3012                 im = re.match(regex, v)
3013                 if im:
3014                     i = int(im.group(1), base)
3015                     return '"%d":' % i if v.endswith(':') else '%d' % i
3016
3017             if v in vars:
3018                 return vars[v]
3019
3020         return '"%s"' % v
3021
3022     code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
3023
3024     return re.sub(r'''(?sx)
3025         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
3026         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
3027         {comment}|,(?={skip}[\]}}])|
3028         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3029         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
3030         [0-9]+(?={skip}:)|
3031         !+
3032         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
3033
3034
3035 def qualities(quality_ids):
3036     """ Get a numeric quality value out of a list of possible values """
3037     def q(qid):
3038         try:
3039             return quality_ids.index(qid)
3040         except ValueError:
3041             return -1
3042     return q
3043
3044
3045 POSTPROCESS_WHEN = {'pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist'}
3046
3047
3048 DEFAULT_OUTTMPL = {
3049     'default': '%(title)s [%(id)s].%(ext)s',
3050     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3051 }
3052 OUTTMPL_TYPES = {
3053     'chapter': None,
3054     'subtitle': None,
3055     'thumbnail': None,
3056     'description': 'description',
3057     'annotation': 'annotations.xml',
3058     'infojson': 'info.json',
3059     'link': None,
3060     'pl_video': None,
3061     'pl_thumbnail': None,
3062     'pl_description': 'description',
3063     'pl_infojson': 'info.json',
3064 }
3065
3066 # As of [1] format syntax is:
3067 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3068 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3069 STR_FORMAT_RE_TMPL = r'''(?x)
3070     (?<!%)(?P<prefix>(?:%%)*)
3071     %
3072     (?P<has_key>\((?P<key>{0})\))?
3073     (?P<format>
3074         (?P<conversion>[#0\-+ ]+)?
3075         (?P<min_width>\d+)?
3076         (?P<precision>\.\d+)?
3077         (?P<len_mod>[hlL])?  # unused in python
3078         {1}  # conversion type
3079     )
3080 '''
3081
3082
3083 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3084
3085
3086 def limit_length(s, length):
3087     """ Add ellipses to overly long strings """
3088     if s is None:
3089         return None
3090     ELLIPSES = '...'
3091     if len(s) > length:
3092         return s[:length - len(ELLIPSES)] + ELLIPSES
3093     return s
3094
3095
3096 def version_tuple(v):
3097     return tuple(int(e) for e in re.split(r'[-.]', v))
3098
3099
3100 def is_outdated_version(version, limit, assume_new=True):
3101     if not version:
3102         return not assume_new
3103     try:
3104         return version_tuple(version) < version_tuple(limit)
3105     except ValueError:
3106         return not assume_new
3107
3108
3109 def ytdl_is_updateable():
3110     """ Returns if yt-dlp can be updated with -U """
3111
3112     from .update import is_non_updateable
3113
3114     return not is_non_updateable()
3115
3116
3117 def args_to_str(args):
3118     # Get a short string representation for a subprocess command
3119     return ' '.join(compat_shlex_quote(a) for a in args)
3120
3121
3122 def error_to_compat_str(err):
3123     return str(err)
3124
3125
3126 def error_to_str(err):
3127     return f'{type(err).__name__}: {err}'
3128
3129
3130 def mimetype2ext(mt):
3131     if mt is None:
3132         return None
3133
3134     mt, _, params = mt.partition(';')
3135     mt = mt.strip()
3136
3137     FULL_MAP = {
3138         'audio/mp4': 'm4a',
3139         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3140         # it's the most popular one
3141         'audio/mpeg': 'mp3',
3142         'audio/x-wav': 'wav',
3143         'audio/wav': 'wav',
3144         'audio/wave': 'wav',
3145     }
3146
3147     ext = FULL_MAP.get(mt)
3148     if ext is not None:
3149         return ext
3150
3151     SUBTYPE_MAP = {
3152         '3gpp': '3gp',
3153         'smptett+xml': 'tt',
3154         'ttaf+xml': 'dfxp',
3155         'ttml+xml': 'ttml',
3156         'x-flv': 'flv',
3157         'x-mp4-fragmented': 'mp4',
3158         'x-ms-sami': 'sami',
3159         'x-ms-wmv': 'wmv',
3160         'mpegurl': 'm3u8',
3161         'x-mpegurl': 'm3u8',
3162         'vnd.apple.mpegurl': 'm3u8',
3163         'dash+xml': 'mpd',
3164         'f4m+xml': 'f4m',
3165         'hds+xml': 'f4m',
3166         'vnd.ms-sstr+xml': 'ism',
3167         'quicktime': 'mov',
3168         'mp2t': 'ts',
3169         'x-wav': 'wav',
3170         'filmstrip+json': 'fs',
3171         'svg+xml': 'svg',
3172     }
3173
3174     _, _, subtype = mt.rpartition('/')
3175     ext = SUBTYPE_MAP.get(subtype.lower())
3176     if ext is not None:
3177         return ext
3178
3179     SUFFIX_MAP = {
3180         'json': 'json',
3181         'xml': 'xml',
3182         'zip': 'zip',
3183         'gzip': 'gz',
3184     }
3185
3186     _, _, suffix = subtype.partition('+')
3187     ext = SUFFIX_MAP.get(suffix)
3188     if ext is not None:
3189         return ext
3190
3191     return subtype.replace('+', '.')
3192
3193
3194 def ext2mimetype(ext_or_url):
3195     if not ext_or_url:
3196         return None
3197     if '.' not in ext_or_url:
3198         ext_or_url = f'file.{ext_or_url}'
3199     return mimetypes.guess_type(ext_or_url)[0]
3200
3201
3202 def parse_codecs(codecs_str):
3203     # http://tools.ietf.org/html/rfc6381
3204     if not codecs_str:
3205         return {}
3206     split_codecs = list(filter(None, map(
3207         str.strip, codecs_str.strip().strip(',').split(','))))
3208     vcodec, acodec, tcodec, hdr = None, None, None, None
3209     for full_codec in split_codecs:
3210         parts = full_codec.split('.')
3211         codec = parts[0].replace('0', '')
3212         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3213                      'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3214             if not vcodec:
3215                 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
3216                 if codec in ('dvh1', 'dvhe'):
3217                     hdr = 'DV'
3218                 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
3219                     hdr = 'HDR10'
3220                 elif full_codec.replace('0', '').startswith('vp9.2'):
3221                     hdr = 'HDR10'
3222         elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3223             if not acodec:
3224                 acodec = full_codec
3225         elif codec in ('stpp', 'wvtt',):
3226             if not tcodec:
3227                 tcodec = full_codec
3228         else:
3229             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
3230     if vcodec or acodec or tcodec:
3231         return {
3232             'vcodec': vcodec or 'none',
3233             'acodec': acodec or 'none',
3234             'dynamic_range': hdr,
3235             **({'tcodec': tcodec} if tcodec is not None else {}),
3236         }
3237     elif len(split_codecs) == 2:
3238         return {
3239             'vcodec': split_codecs[0],
3240             'acodec': split_codecs[1],
3241         }
3242     return {}
3243
3244
3245 def urlhandle_detect_ext(url_handle):
3246     getheader = url_handle.headers.get
3247
3248     cd = getheader('Content-Disposition')
3249     if cd:
3250         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3251         if m:
3252             e = determine_ext(m.group('filename'), default_ext=None)
3253             if e:
3254                 return e
3255
3256     return mimetype2ext(getheader('Content-Type'))
3257
3258
3259 def encode_data_uri(data, mime_type):
3260     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3261
3262
3263 def age_restricted(content_limit, age_limit):
3264     """ Returns True iff the content should be blocked """
3265
3266     if age_limit is None:  # No limit set
3267         return False
3268     if content_limit is None:
3269         return False  # Content available for everyone
3270     return age_limit < content_limit
3271
3272
3273 def is_html(first_bytes):
3274     """ Detect whether a file contains HTML by examining its first bytes. """
3275
3276     BOMS = [
3277         (b'\xef\xbb\xbf', 'utf-8'),
3278         (b'\x00\x00\xfe\xff', 'utf-32-be'),
3279         (b'\xff\xfe\x00\x00', 'utf-32-le'),
3280         (b'\xff\xfe', 'utf-16-le'),
3281         (b'\xfe\xff', 'utf-16-be'),
3282     ]
3283     for bom, enc in BOMS:
3284         if first_bytes.startswith(bom):
3285             s = first_bytes[len(bom):].decode(enc, 'replace')
3286             break
3287     else:
3288         s = first_bytes.decode('utf-8', 'replace')
3289
3290     return re.match(r'^\s*<', s)
3291
3292
3293 def determine_protocol(info_dict):
3294     protocol = info_dict.get('protocol')
3295     if protocol is not None:
3296         return protocol
3297
3298     url = sanitize_url(info_dict['url'])
3299     if url.startswith('rtmp'):
3300         return 'rtmp'
3301     elif url.startswith('mms'):
3302         return 'mms'
3303     elif url.startswith('rtsp'):
3304         return 'rtsp'
3305
3306     ext = determine_ext(url)
3307     if ext == 'm3u8':
3308         return 'm3u8'
3309     elif ext == 'f4m':
3310         return 'f4m'
3311
3312     return compat_urllib_parse_urlparse(url).scheme
3313
3314
3315 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3316     """ Render a list of rows, each as a list of values.
3317     Text after a \t will be right aligned """
3318     def width(string):
3319         return len(remove_terminal_sequences(string).replace('\t', ''))
3320
3321     def get_max_lens(table):
3322         return [max(width(str(v)) for v in col) for col in zip(*table)]
3323
3324     def filter_using_list(row, filterArray):
3325         return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
3326
3327     max_lens = get_max_lens(data) if hide_empty else []
3328     header_row = filter_using_list(header_row, max_lens)
3329     data = [filter_using_list(row, max_lens) for row in data]
3330
3331     table = [header_row] + data
3332     max_lens = get_max_lens(table)
3333     extra_gap += 1
3334     if delim:
3335         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3336         table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
3337     for row in table:
3338         for pos, text in enumerate(map(str, row)):
3339             if '\t' in text:
3340                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3341             else:
3342                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3343     ret = '\n'.join(''.join(row).rstrip() for row in table)
3344     return ret
3345
3346
3347 def _match_one(filter_part, dct, incomplete):
3348     # TODO: Generalize code with YoutubeDL._build_format_filter
3349     STRING_OPERATORS = {
3350         '*=': operator.contains,
3351         '^=': lambda attr, value: attr.startswith(value),
3352         '$=': lambda attr, value: attr.endswith(value),
3353         '~=': lambda attr, value: re.search(value, attr),
3354     }
3355     COMPARISON_OPERATORS = {
3356         **STRING_OPERATORS,
3357         '<=': operator.le,  # "<=" must be defined above "<"
3358         '<': operator.lt,
3359         '>=': operator.ge,
3360         '>': operator.gt,
3361         '=': operator.eq,
3362     }
3363
3364     if isinstance(incomplete, bool):
3365         is_incomplete = lambda _: incomplete
3366     else:
3367         is_incomplete = lambda k: k in incomplete
3368
3369     operator_rex = re.compile(r'''(?x)\s*
3370         (?P<key>[a-z_]+)
3371         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3372         (?:
3373             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3374             (?P<strval>.+?)
3375         )
3376         \s*$
3377         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3378     m = operator_rex.search(filter_part)
3379     if m:
3380         m = m.groupdict()
3381         unnegated_op = COMPARISON_OPERATORS[m['op']]
3382         if m['negation']:
3383             op = lambda attr, value: not unnegated_op(attr, value)
3384         else:
3385             op = unnegated_op
3386         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3387         if m['quote']:
3388             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3389         actual_value = dct.get(m['key'])
3390         numeric_comparison = None
3391         if isinstance(actual_value, (int, float)):
3392             # If the original field is a string and matching comparisonvalue is
3393             # a number we should respect the origin of the original field
3394             # and process comparison value as a string (see
3395             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3396             try:
3397                 numeric_comparison = int(comparison_value)
3398             except ValueError:
3399                 numeric_comparison = parse_filesize(comparison_value)
3400                 if numeric_comparison is None:
3401                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3402                 if numeric_comparison is None:
3403                     numeric_comparison = parse_duration(comparison_value)
3404         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3405             raise ValueError('Operator %s only supports string values!' % m['op'])
3406         if actual_value is None:
3407             return is_incomplete(m['key']) or m['none_inclusive']
3408         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3409
3410     UNARY_OPERATORS = {
3411         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3412         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3413     }
3414     operator_rex = re.compile(r'''(?x)\s*
3415         (?P<op>%s)\s*(?P<key>[a-z_]+)
3416         \s*$
3417         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3418     m = operator_rex.search(filter_part)
3419     if m:
3420         op = UNARY_OPERATORS[m.group('op')]
3421         actual_value = dct.get(m.group('key'))
3422         if is_incomplete(m.group('key')) and actual_value is None:
3423             return True
3424         return op(actual_value)
3425
3426     raise ValueError('Invalid filter part %r' % filter_part)
3427
3428
3429 def match_str(filter_str, dct, incomplete=False):
3430     """ Filter a dictionary with a simple string syntax.
3431     @returns           Whether the filter passes
3432     @param incomplete  Set of keys that is expected to be missing from dct.
3433                        Can be True/False to indicate all/none of the keys may be missing.
3434                        All conditions on incomplete keys pass if the key is missing
3435     """
3436     return all(
3437         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3438         for filter_part in re.split(r'(?<!\\)&', filter_str))
3439
3440
3441 def match_filter_func(filters):
3442     if not filters:
3443         return None
3444     filters = variadic(filters)
3445
3446     def _match_func(info_dict, *args, **kwargs):
3447         if any(match_str(f, info_dict, *args, **kwargs) for f in filters):
3448             return None
3449         else:
3450             video_title = info_dict.get('title') or info_dict.get('id') or 'video'
3451             filter_str = ') | ('.join(map(str.strip, filters))
3452             return f'{video_title} does not pass filter ({filter_str}), skipping ..'
3453     return _match_func
3454
3455
3456 def parse_dfxp_time_expr(time_expr):
3457     if not time_expr:
3458         return
3459
3460     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
3461     if mobj:
3462         return float(mobj.group('time_offset'))
3463
3464     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3465     if mobj:
3466         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3467
3468
3469 def srt_subtitles_timecode(seconds):
3470     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3471
3472
3473 def ass_subtitles_timecode(seconds):
3474     time = timetuple_from_msec(seconds * 1000)
3475     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3476
3477
3478 def dfxp2srt(dfxp_data):
3479     '''
3480     @param dfxp_data A bytes-like object containing DFXP data
3481     @returns A unicode object containing converted SRT data
3482     '''
3483     LEGACY_NAMESPACES = (
3484         (b'http://www.w3.org/ns/ttml', [
3485             b'http://www.w3.org/2004/11/ttaf1',
3486             b'http://www.w3.org/2006/04/ttaf1',
3487             b'http://www.w3.org/2006/10/ttaf1',
3488         ]),
3489         (b'http://www.w3.org/ns/ttml#styling', [
3490             b'http://www.w3.org/ns/ttml#style',
3491         ]),
3492     )
3493
3494     SUPPORTED_STYLING = [
3495         'color',
3496         'fontFamily',
3497         'fontSize',
3498         'fontStyle',
3499         'fontWeight',
3500         'textDecoration'
3501     ]
3502
3503     _x = functools.partial(xpath_with_ns, ns_map={
3504         'xml': 'http://www.w3.org/XML/1998/namespace',
3505         'ttml': 'http://www.w3.org/ns/ttml',
3506         'tts': 'http://www.w3.org/ns/ttml#styling',
3507     })
3508
3509     styles = {}
3510     default_style = {}
3511
3512     class TTMLPElementParser:
3513         _out = ''
3514         _unclosed_elements = []
3515         _applied_styles = []
3516
3517         def start(self, tag, attrib):
3518             if tag in (_x('ttml:br'), 'br'):
3519                 self._out += '\n'
3520             else:
3521                 unclosed_elements = []
3522                 style = {}
3523                 element_style_id = attrib.get('style')
3524                 if default_style:
3525                     style.update(default_style)
3526                 if element_style_id:
3527                     style.update(styles.get(element_style_id, {}))
3528                 for prop in SUPPORTED_STYLING:
3529                     prop_val = attrib.get(_x('tts:' + prop))
3530                     if prop_val:
3531                         style[prop] = prop_val
3532                 if style:
3533                     font = ''
3534                     for k, v in sorted(style.items()):
3535                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3536                             continue
3537                         if k == 'color':
3538                             font += ' color="%s"' % v
3539                         elif k == 'fontSize':
3540                             font += ' size="%s"' % v
3541                         elif k == 'fontFamily':
3542                             font += ' face="%s"' % v
3543                         elif k == 'fontWeight' and v == 'bold':
3544                             self._out += '<b>'
3545                             unclosed_elements.append('b')
3546                         elif k == 'fontStyle' and v == 'italic':
3547                             self._out += '<i>'
3548                             unclosed_elements.append('i')
3549                         elif k == 'textDecoration' and v == 'underline':
3550                             self._out += '<u>'
3551                             unclosed_elements.append('u')
3552                     if font:
3553                         self._out += '<font' + font + '>'
3554                         unclosed_elements.append('font')
3555                     applied_style = {}
3556                     if self._applied_styles:
3557                         applied_style.update(self._applied_styles[-1])
3558                     applied_style.update(style)
3559                     self._applied_styles.append(applied_style)
3560                 self._unclosed_elements.append(unclosed_elements)
3561
3562         def end(self, tag):
3563             if tag not in (_x('ttml:br'), 'br'):
3564                 unclosed_elements = self._unclosed_elements.pop()
3565                 for element in reversed(unclosed_elements):
3566                     self._out += '</%s>' % element
3567                 if unclosed_elements and self._applied_styles:
3568                     self._applied_styles.pop()
3569
3570         def data(self, data):
3571             self._out += data
3572
3573         def close(self):
3574             return self._out.strip()
3575
3576     def parse_node(node):
3577         target = TTMLPElementParser()
3578         parser = xml.etree.ElementTree.XMLParser(target=target)
3579         parser.feed(xml.etree.ElementTree.tostring(node))
3580         return parser.close()
3581
3582     for k, v in LEGACY_NAMESPACES:
3583         for ns in v:
3584             dfxp_data = dfxp_data.replace(ns, k)
3585
3586     dfxp = compat_etree_fromstring(dfxp_data)
3587     out = []
3588     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
3589
3590     if not paras:
3591         raise ValueError('Invalid dfxp/TTML subtitle')
3592
3593     repeat = False
3594     while True:
3595         for style in dfxp.findall(_x('.//ttml:style')):
3596             style_id = style.get('id') or style.get(_x('xml:id'))
3597             if not style_id:
3598                 continue
3599             parent_style_id = style.get('style')
3600             if parent_style_id:
3601                 if parent_style_id not in styles:
3602                     repeat = True
3603                     continue
3604                 styles[style_id] = styles[parent_style_id].copy()
3605             for prop in SUPPORTED_STYLING:
3606                 prop_val = style.get(_x('tts:' + prop))
3607                 if prop_val:
3608                     styles.setdefault(style_id, {})[prop] = prop_val
3609         if repeat:
3610             repeat = False
3611         else:
3612             break
3613
3614     for p in ('body', 'div'):
3615         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
3616         if ele is None:
3617             continue
3618         style = styles.get(ele.get('style'))
3619         if not style:
3620             continue
3621         default_style.update(style)
3622
3623     for para, index in zip(paras, itertools.count(1)):
3624         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
3625         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
3626         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
3627         if begin_time is None:
3628             continue
3629         if not end_time:
3630             if not dur:
3631                 continue
3632             end_time = begin_time + dur
3633         out.append('%d\n%s --> %s\n%s\n\n' % (
3634             index,
3635             srt_subtitles_timecode(begin_time),
3636             srt_subtitles_timecode(end_time),
3637             parse_node(para)))
3638
3639     return ''.join(out)
3640
3641
3642 def cli_option(params, command_option, param):
3643     param = params.get(param)
3644     if param:
3645         param = compat_str(param)
3646     return [command_option, param] if param is not None else []
3647
3648
3649 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
3650     param = params.get(param)
3651     if param is None:
3652         return []
3653     assert isinstance(param, bool)
3654     if separator:
3655         return [command_option + separator + (true_value if param else false_value)]
3656     return [command_option, true_value if param else false_value]
3657
3658
3659 def cli_valueless_option(params, command_option, param, expected_value=True):
3660     param = params.get(param)
3661     return [command_option] if param == expected_value else []
3662
3663
3664 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
3665     if isinstance(argdict, (list, tuple)):  # for backward compatibility
3666         if use_compat:
3667             return argdict
3668         else:
3669             argdict = None
3670     if argdict is None:
3671         return default
3672     assert isinstance(argdict, dict)
3673
3674     assert isinstance(keys, (list, tuple))
3675     for key_list in keys:
3676         arg_list = list(filter(
3677             lambda x: x is not None,
3678             [argdict.get(key.lower()) for key in variadic(key_list)]))
3679         if arg_list:
3680             return [arg for args in arg_list for arg in args]
3681     return default
3682
3683
3684 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
3685     main_key, exe = main_key.lower(), exe.lower()
3686     root_key = exe if main_key == exe else f'{main_key}+{exe}'
3687     keys = [f'{root_key}{k}' for k in (keys or [''])]
3688     if root_key in keys:
3689         if main_key != exe:
3690             keys.append((main_key, exe))
3691         keys.append('default')
3692     else:
3693         use_compat = False
3694     return cli_configuration_args(argdict, keys, default, use_compat)
3695
3696
3697 class ISO639Utils:
3698     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
3699     _lang_map = {
3700         'aa': 'aar',
3701         'ab': 'abk',
3702         'ae': 'ave',
3703         'af': 'afr',
3704         'ak': 'aka',
3705         'am': 'amh',
3706         'an': 'arg',
3707         'ar': 'ara',
3708         'as': 'asm',
3709         'av': 'ava',
3710         'ay': 'aym',
3711         'az': 'aze',
3712         'ba': 'bak',
3713         'be': 'bel',
3714         'bg': 'bul',
3715         'bh': 'bih',
3716         'bi': 'bis',
3717         'bm': 'bam',
3718         'bn': 'ben',
3719         'bo': 'bod',
3720         'br': 'bre',
3721         'bs': 'bos',
3722         'ca': 'cat',
3723         'ce': 'che',
3724         'ch': 'cha',
3725         'co': 'cos',
3726         'cr': 'cre',
3727         'cs': 'ces',
3728         'cu': 'chu',
3729         'cv': 'chv',
3730         'cy': 'cym',
3731         'da': 'dan',
3732         'de': 'deu',
3733         'dv': 'div',
3734         'dz': 'dzo',
3735         'ee': 'ewe',
3736         'el': 'ell',
3737         'en': 'eng',
3738         'eo': 'epo',
3739         'es': 'spa',
3740         'et': 'est',
3741         'eu': 'eus',
3742         'fa': 'fas',
3743         'ff': 'ful',
3744         'fi': 'fin',
3745         'fj': 'fij',
3746         'fo': 'fao',
3747         'fr': 'fra',
3748         'fy': 'fry',
3749         'ga': 'gle',
3750         'gd': 'gla',
3751         'gl': 'glg',
3752         'gn': 'grn',
3753         'gu': 'guj',
3754         'gv': 'glv',
3755         'ha': 'hau',
3756         'he': 'heb',
3757         'iw': 'heb',  # Replaced by he in 1989 revision
3758         'hi': 'hin',
3759         'ho': 'hmo',
3760         'hr': 'hrv',
3761         'ht': 'hat',
3762         'hu': 'hun',
3763         'hy': 'hye',
3764         'hz': 'her',
3765         'ia': 'ina',
3766         'id': 'ind',
3767         'in': 'ind',  # Replaced by id in 1989 revision
3768         'ie': 'ile',
3769         'ig': 'ibo',
3770         'ii': 'iii',
3771         'ik': 'ipk',
3772         'io': 'ido',
3773         'is': 'isl',
3774         'it': 'ita',
3775         'iu': 'iku',
3776         'ja': 'jpn',
3777         'jv': 'jav',
3778         'ka': 'kat',
3779         'kg': 'kon',
3780         'ki': 'kik',
3781         'kj': 'kua',
3782         'kk': 'kaz',
3783         'kl': 'kal',
3784         'km': 'khm',
3785         'kn': 'kan',
3786         'ko': 'kor',
3787         'kr': 'kau',
3788         'ks': 'kas',
3789         'ku': 'kur',
3790         'kv': 'kom',
3791         'kw': 'cor',
3792         'ky': 'kir',
3793         'la': 'lat',
3794         'lb': 'ltz',
3795         'lg': 'lug',
3796         'li': 'lim',
3797         'ln': 'lin',
3798         'lo': 'lao',
3799         'lt': 'lit',
3800         'lu': 'lub',
3801         'lv': 'lav',
3802         'mg': 'mlg',
3803         'mh': 'mah',
3804         'mi': 'mri',
3805         'mk': 'mkd',
3806         'ml': 'mal',
3807         'mn': 'mon',
3808         'mr': 'mar',
3809         'ms': 'msa',
3810         'mt': 'mlt',
3811         'my': 'mya',
3812         'na': 'nau',
3813         'nb': 'nob',
3814         'nd': 'nde',
3815         'ne': 'nep',
3816         'ng': 'ndo',
3817         'nl': 'nld',
3818         'nn': 'nno',
3819         'no': 'nor',
3820         'nr': 'nbl',
3821         'nv': 'nav',
3822         'ny': 'nya',
3823         'oc': 'oci',
3824         'oj': 'oji',
3825         'om': 'orm',
3826         'or': 'ori',
3827         'os': 'oss',
3828         'pa': 'pan',
3829         'pi': 'pli',
3830         'pl': 'pol',
3831         'ps': 'pus',
3832         'pt': 'por',
3833         'qu': 'que',
3834         'rm': 'roh',
3835         'rn': 'run',
3836         'ro': 'ron',
3837         'ru': 'rus',
3838         'rw': 'kin',
3839         'sa': 'san',
3840         'sc': 'srd',
3841         'sd': 'snd',
3842         'se': 'sme',
3843         'sg': 'sag',
3844         'si': 'sin',
3845         'sk': 'slk',
3846         'sl': 'slv',
3847         'sm': 'smo',
3848         'sn': 'sna',
3849         'so': 'som',
3850         'sq': 'sqi',
3851         'sr': 'srp',
3852         'ss': 'ssw',
3853         'st': 'sot',
3854         'su': 'sun',
3855         'sv': 'swe',
3856         'sw': 'swa',
3857         'ta': 'tam',
3858         'te': 'tel',
3859         'tg': 'tgk',
3860         'th': 'tha',
3861         'ti': 'tir',
3862         'tk': 'tuk',
3863         'tl': 'tgl',
3864         'tn': 'tsn',
3865         'to': 'ton',
3866         'tr': 'tur',
3867         'ts': 'tso',
3868         'tt': 'tat',
3869         'tw': 'twi',
3870         'ty': 'tah',
3871         'ug': 'uig',
3872         'uk': 'ukr',
3873         'ur': 'urd',
3874         'uz': 'uzb',
3875         've': 'ven',
3876         'vi': 'vie',
3877         'vo': 'vol',
3878         'wa': 'wln',
3879         'wo': 'wol',
3880         'xh': 'xho',
3881         'yi': 'yid',
3882         'ji': 'yid',  # Replaced by yi in 1989 revision
3883         'yo': 'yor',
3884         'za': 'zha',
3885         'zh': 'zho',
3886         'zu': 'zul',
3887     }
3888
3889     @classmethod
3890     def short2long(cls, code):
3891         """Convert language code from ISO 639-1 to ISO 639-2/T"""
3892         return cls._lang_map.get(code[:2])
3893
3894     @classmethod
3895     def long2short(cls, code):
3896         """Convert language code from ISO 639-2/T to ISO 639-1"""
3897         for short_name, long_name in cls._lang_map.items():
3898             if long_name == code:
3899                 return short_name
3900
3901
3902 class ISO3166Utils:
3903     # From http://data.okfn.org/data/core/country-list
3904     _country_map = {
3905         'AF': 'Afghanistan',
3906         'AX': 'Åland Islands',
3907         'AL': 'Albania',
3908         'DZ': 'Algeria',
3909         'AS': 'American Samoa',
3910         'AD': 'Andorra',
3911         'AO': 'Angola',
3912         'AI': 'Anguilla',
3913         'AQ': 'Antarctica',
3914         'AG': 'Antigua and Barbuda',
3915         'AR': 'Argentina',
3916         'AM': 'Armenia',
3917         'AW': 'Aruba',
3918         'AU': 'Australia',
3919         'AT': 'Austria',
3920         'AZ': 'Azerbaijan',
3921         'BS': 'Bahamas',
3922         'BH': 'Bahrain',
3923         'BD': 'Bangladesh',
3924         'BB': 'Barbados',
3925         'BY': 'Belarus',
3926         'BE': 'Belgium',
3927         'BZ': 'Belize',
3928         'BJ': 'Benin',
3929         'BM': 'Bermuda',
3930         'BT': 'Bhutan',
3931         'BO': 'Bolivia, Plurinational State of',
3932         'BQ': 'Bonaire, Sint Eustatius and Saba',
3933         'BA': 'Bosnia and Herzegovina',
3934         'BW': 'Botswana',
3935         'BV': 'Bouvet Island',
3936         'BR': 'Brazil',
3937         'IO': 'British Indian Ocean Territory',
3938         'BN': 'Brunei Darussalam',
3939         'BG': 'Bulgaria',
3940         'BF': 'Burkina Faso',
3941         'BI': 'Burundi',
3942         'KH': 'Cambodia',
3943         'CM': 'Cameroon',
3944         'CA': 'Canada',
3945         'CV': 'Cape Verde',
3946         'KY': 'Cayman Islands',
3947         'CF': 'Central African Republic',
3948         'TD': 'Chad',
3949         'CL': 'Chile',
3950         'CN': 'China',
3951         'CX': 'Christmas Island',
3952         'CC': 'Cocos (Keeling) Islands',
3953         'CO': 'Colombia',
3954         'KM': 'Comoros',
3955         'CG': 'Congo',
3956         'CD': 'Congo, the Democratic Republic of the',
3957         'CK': 'Cook Islands',
3958         'CR': 'Costa Rica',
3959         'CI': 'Côte d\'Ivoire',
3960         'HR': 'Croatia',
3961         'CU': 'Cuba',
3962         'CW': 'Curaçao',
3963         'CY': 'Cyprus',
3964         'CZ': 'Czech Republic',
3965         'DK': 'Denmark',
3966         'DJ': 'Djibouti',
3967         'DM': 'Dominica',
3968         'DO': 'Dominican Republic',
3969         'EC': 'Ecuador',
3970         'EG': 'Egypt',
3971         'SV': 'El Salvador',
3972         'GQ': 'Equatorial Guinea',
3973         'ER': 'Eritrea',
3974         'EE': 'Estonia',
3975         'ET': 'Ethiopia',
3976         'FK': 'Falkland Islands (Malvinas)',
3977         'FO': 'Faroe Islands',
3978         'FJ': 'Fiji',
3979         'FI': 'Finland',
3980         'FR': 'France',
3981         'GF': 'French Guiana',
3982         'PF': 'French Polynesia',
3983         'TF': 'French Southern Territories',
3984         'GA': 'Gabon',
3985         'GM': 'Gambia',
3986         'GE': 'Georgia',
3987         'DE': 'Germany',
3988         'GH': 'Ghana',
3989         'GI': 'Gibraltar',
3990         'GR': 'Greece',
3991         'GL': 'Greenland',
3992         'GD': 'Grenada',
3993         'GP': 'Guadeloupe',
3994         'GU': 'Guam',
3995         'GT': 'Guatemala',
3996         'GG': 'Guernsey',
3997         'GN': 'Guinea',
3998         'GW': 'Guinea-Bissau',
3999         'GY': 'Guyana',
4000         'HT': 'Haiti',
4001         'HM': 'Heard Island and McDonald Islands',
4002         'VA': 'Holy See (Vatican City State)',
4003         'HN': 'Honduras',
4004         'HK': 'Hong Kong',
4005         'HU': 'Hungary',
4006         'IS': 'Iceland',
4007         'IN': 'India',
4008         'ID': 'Indonesia',
4009         'IR': 'Iran, Islamic Republic of',
4010         'IQ': 'Iraq',
4011         'IE': 'Ireland',
4012         'IM': 'Isle of Man',
4013         'IL': 'Israel',
4014         'IT': 'Italy',
4015         'JM': 'Jamaica',
4016         'JP': 'Japan',
4017         'JE': 'Jersey',
4018         'JO': 'Jordan',
4019         'KZ': 'Kazakhstan',
4020         'KE': 'Kenya',
4021         'KI': 'Kiribati',
4022         'KP': 'Korea, Democratic People\'s Republic of',
4023         'KR': 'Korea, Republic of',
4024         'KW': 'Kuwait',
4025         'KG': 'Kyrgyzstan',
4026         'LA': 'Lao People\'s Democratic Republic',
4027         'LV': 'Latvia',
4028         'LB': 'Lebanon',
4029         'LS': 'Lesotho',
4030         'LR': 'Liberia',
4031         'LY': 'Libya',
4032         'LI': 'Liechtenstein',
4033         'LT': 'Lithuania',
4034         'LU': 'Luxembourg',
4035         'MO': 'Macao',
4036         'MK': 'Macedonia, the Former Yugoslav Republic of',
4037         'MG': 'Madagascar',
4038         'MW': 'Malawi',
4039         'MY': 'Malaysia',
4040         'MV': 'Maldives',
4041         'ML': 'Mali',
4042         'MT': 'Malta',
4043         'MH': 'Marshall Islands',
4044         'MQ': 'Martinique',
4045         'MR': 'Mauritania',
4046         'MU': 'Mauritius',
4047         'YT': 'Mayotte',
4048         'MX': 'Mexico',
4049         'FM': 'Micronesia, Federated States of',
4050         'MD': 'Moldova, Republic of',
4051         'MC': 'Monaco',
4052         'MN': 'Mongolia',
4053         'ME': 'Montenegro',
4054         'MS': 'Montserrat',
4055         'MA': 'Morocco',
4056         'MZ': 'Mozambique',
4057         'MM': 'Myanmar',
4058         'NA': 'Namibia',
4059         'NR': 'Nauru',
4060         'NP': 'Nepal',
4061         'NL': 'Netherlands',
4062         'NC': 'New Caledonia',
4063         'NZ': 'New Zealand',
4064         'NI': 'Nicaragua',
4065         'NE': 'Niger',
4066         'NG': 'Nigeria',
4067         'NU': 'Niue',
4068         'NF': 'Norfolk Island',
4069         'MP': 'Northern Mariana Islands',
4070         'NO': 'Norway',
4071         'OM': 'Oman',
4072         'PK': 'Pakistan',
4073         'PW': 'Palau',
4074         'PS': 'Palestine, State of',
4075         'PA': 'Panama',
4076         'PG': 'Papua New Guinea',
4077         'PY': 'Paraguay',
4078         'PE': 'Peru',
4079         'PH': 'Philippines',
4080         'PN': 'Pitcairn',
4081         'PL': 'Poland',
4082         'PT': 'Portugal',
4083         'PR': 'Puerto Rico',
4084         'QA': 'Qatar',
4085         'RE': 'Réunion',
4086         'RO': 'Romania',
4087         'RU': 'Russian Federation',
4088         'RW': 'Rwanda',
4089         'BL': 'Saint Barthélemy',
4090         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4091         'KN': 'Saint Kitts and Nevis',
4092         'LC': 'Saint Lucia',
4093         'MF': 'Saint Martin (French part)',
4094         'PM': 'Saint Pierre and Miquelon',
4095         'VC': 'Saint Vincent and the Grenadines',
4096         'WS': 'Samoa',
4097         'SM': 'San Marino',
4098         'ST': 'Sao Tome and Principe',
4099         'SA': 'Saudi Arabia',
4100         'SN': 'Senegal',
4101         'RS': 'Serbia',
4102         'SC': 'Seychelles',
4103         'SL': 'Sierra Leone',
4104         'SG': 'Singapore',
4105         'SX': 'Sint Maarten (Dutch part)',
4106         'SK': 'Slovakia',
4107         'SI': 'Slovenia',
4108         'SB': 'Solomon Islands',
4109         'SO': 'Somalia',
4110         'ZA': 'South Africa',
4111         'GS': 'South Georgia and the South Sandwich Islands',
4112         'SS': 'South Sudan',
4113         'ES': 'Spain',
4114         'LK': 'Sri Lanka',
4115         'SD': 'Sudan',
4116         'SR': 'Suriname',
4117         'SJ': 'Svalbard and Jan Mayen',
4118         'SZ': 'Swaziland',
4119         'SE': 'Sweden',
4120         'CH': 'Switzerland',
4121         'SY': 'Syrian Arab Republic',
4122         'TW': 'Taiwan, Province of China',
4123         'TJ': 'Tajikistan',
4124         'TZ': 'Tanzania, United Republic of',
4125         'TH': 'Thailand',
4126         'TL': 'Timor-Leste',
4127         'TG': 'Togo',
4128         'TK': 'Tokelau',
4129         'TO': 'Tonga',
4130         'TT': 'Trinidad and Tobago',
4131         'TN': 'Tunisia',
4132         'TR': 'Turkey',
4133         'TM': 'Turkmenistan',
4134         'TC': 'Turks and Caicos Islands',
4135         'TV': 'Tuvalu',
4136         'UG': 'Uganda',
4137         'UA': 'Ukraine',
4138         'AE': 'United Arab Emirates',
4139         'GB': 'United Kingdom',
4140         'US': 'United States',
4141         'UM': 'United States Minor Outlying Islands',
4142         'UY': 'Uruguay',
4143         'UZ': 'Uzbekistan',
4144         'VU': 'Vanuatu',
4145         'VE': 'Venezuela, Bolivarian Republic of',
4146         'VN': 'Viet Nam',
4147         'VG': 'Virgin Islands, British',
4148         'VI': 'Virgin Islands, U.S.',
4149         'WF': 'Wallis and Futuna',
4150         'EH': 'Western Sahara',
4151         'YE': 'Yemen',
4152         'ZM': 'Zambia',
4153         'ZW': 'Zimbabwe',
4154     }
4155
4156     @classmethod
4157     def short2full(cls, code):
4158         """Convert an ISO 3166-2 country code to the corresponding full name"""
4159         return cls._country_map.get(code.upper())
4160
4161
4162 class GeoUtils:
4163     # Major IPv4 address blocks per country
4164     _country_ip_map = {
4165         'AD': '46.172.224.0/19',
4166         'AE': '94.200.0.0/13',
4167         'AF': '149.54.0.0/17',
4168         'AG': '209.59.64.0/18',
4169         'AI': '204.14.248.0/21',
4170         'AL': '46.99.0.0/16',
4171         'AM': '46.70.0.0/15',
4172         'AO': '105.168.0.0/13',
4173         'AP': '182.50.184.0/21',
4174         'AQ': '23.154.160.0/24',
4175         'AR': '181.0.0.0/12',
4176         'AS': '202.70.112.0/20',
4177         'AT': '77.116.0.0/14',
4178         'AU': '1.128.0.0/11',
4179         'AW': '181.41.0.0/18',
4180         'AX': '185.217.4.0/22',
4181         'AZ': '5.197.0.0/16',
4182         'BA': '31.176.128.0/17',
4183         'BB': '65.48.128.0/17',
4184         'BD': '114.130.0.0/16',
4185         'BE': '57.0.0.0/8',
4186         'BF': '102.178.0.0/15',
4187         'BG': '95.42.0.0/15',
4188         'BH': '37.131.0.0/17',
4189         'BI': '154.117.192.0/18',
4190         'BJ': '137.255.0.0/16',
4191         'BL': '185.212.72.0/23',
4192         'BM': '196.12.64.0/18',
4193         'BN': '156.31.0.0/16',
4194         'BO': '161.56.0.0/16',
4195         'BQ': '161.0.80.0/20',
4196         'BR': '191.128.0.0/12',
4197         'BS': '24.51.64.0/18',
4198         'BT': '119.2.96.0/19',
4199         'BW': '168.167.0.0/16',
4200         'BY': '178.120.0.0/13',
4201         'BZ': '179.42.192.0/18',
4202         'CA': '99.224.0.0/11',
4203         'CD': '41.243.0.0/16',
4204         'CF': '197.242.176.0/21',
4205         'CG': '160.113.0.0/16',
4206         'CH': '85.0.0.0/13',
4207         'CI': '102.136.0.0/14',
4208         'CK': '202.65.32.0/19',
4209         'CL': '152.172.0.0/14',
4210         'CM': '102.244.0.0/14',
4211         'CN': '36.128.0.0/10',
4212         'CO': '181.240.0.0/12',
4213         'CR': '201.192.0.0/12',
4214         'CU': '152.206.0.0/15',
4215         'CV': '165.90.96.0/19',
4216         'CW': '190.88.128.0/17',
4217         'CY': '31.153.0.0/16',
4218         'CZ': '88.100.0.0/14',
4219         'DE': '53.0.0.0/8',
4220         'DJ': '197.241.0.0/17',
4221         'DK': '87.48.0.0/12',
4222         'DM': '192.243.48.0/20',
4223         'DO': '152.166.0.0/15',
4224         'DZ': '41.96.0.0/12',
4225         'EC': '186.68.0.0/15',
4226         'EE': '90.190.0.0/15',
4227         'EG': '156.160.0.0/11',
4228         'ER': '196.200.96.0/20',
4229         'ES': '88.0.0.0/11',
4230         'ET': '196.188.0.0/14',
4231         'EU': '2.16.0.0/13',
4232         'FI': '91.152.0.0/13',
4233         'FJ': '144.120.0.0/16',
4234         'FK': '80.73.208.0/21',
4235         'FM': '119.252.112.0/20',
4236         'FO': '88.85.32.0/19',
4237         'FR': '90.0.0.0/9',
4238         'GA': '41.158.0.0/15',
4239         'GB': '25.0.0.0/8',
4240         'GD': '74.122.88.0/21',
4241         'GE': '31.146.0.0/16',
4242         'GF': '161.22.64.0/18',
4243         'GG': '62.68.160.0/19',
4244         'GH': '154.160.0.0/12',
4245         'GI': '95.164.0.0/16',
4246         'GL': '88.83.0.0/19',
4247         'GM': '160.182.0.0/15',
4248         'GN': '197.149.192.0/18',
4249         'GP': '104.250.0.0/19',
4250         'GQ': '105.235.224.0/20',
4251         'GR': '94.64.0.0/13',
4252         'GT': '168.234.0.0/16',
4253         'GU': '168.123.0.0/16',
4254         'GW': '197.214.80.0/20',
4255         'GY': '181.41.64.0/18',
4256         'HK': '113.252.0.0/14',
4257         'HN': '181.210.0.0/16',
4258         'HR': '93.136.0.0/13',
4259         'HT': '148.102.128.0/17',
4260         'HU': '84.0.0.0/14',
4261         'ID': '39.192.0.0/10',
4262         'IE': '87.32.0.0/12',
4263         'IL': '79.176.0.0/13',
4264         'IM': '5.62.80.0/20',
4265         'IN': '117.192.0.0/10',
4266         'IO': '203.83.48.0/21',
4267         'IQ': '37.236.0.0/14',
4268         'IR': '2.176.0.0/12',
4269         'IS': '82.221.0.0/16',
4270         'IT': '79.0.0.0/10',
4271         'JE': '87.244.64.0/18',
4272         'JM': '72.27.0.0/17',
4273         'JO': '176.29.0.0/16',
4274         'JP': '133.0.0.0/8',
4275         'KE': '105.48.0.0/12',
4276         'KG': '158.181.128.0/17',
4277         'KH': '36.37.128.0/17',
4278         'KI': '103.25.140.0/22',
4279         'KM': '197.255.224.0/20',
4280         'KN': '198.167.192.0/19',
4281         'KP': '175.45.176.0/22',
4282         'KR': '175.192.0.0/10',
4283         'KW': '37.36.0.0/14',
4284         'KY': '64.96.0.0/15',
4285         'KZ': '2.72.0.0/13',
4286         'LA': '115.84.64.0/18',
4287         'LB': '178.135.0.0/16',
4288         'LC': '24.92.144.0/20',
4289         'LI': '82.117.0.0/19',
4290         'LK': '112.134.0.0/15',
4291         'LR': '102.183.0.0/16',
4292         'LS': '129.232.0.0/17',
4293         'LT': '78.56.0.0/13',
4294         'LU': '188.42.0.0/16',
4295         'LV': '46.109.0.0/16',
4296         'LY': '41.252.0.0/14',
4297         'MA': '105.128.0.0/11',
4298         'MC': '88.209.64.0/18',
4299         'MD': '37.246.0.0/16',
4300         'ME': '178.175.0.0/17',
4301         'MF': '74.112.232.0/21',
4302         'MG': '154.126.0.0/17',
4303         'MH': '117.103.88.0/21',
4304         'MK': '77.28.0.0/15',
4305         'ML': '154.118.128.0/18',
4306         'MM': '37.111.0.0/17',
4307         'MN': '49.0.128.0/17',
4308         'MO': '60.246.0.0/16',
4309         'MP': '202.88.64.0/20',
4310         'MQ': '109.203.224.0/19',
4311         'MR': '41.188.64.0/18',
4312         'MS': '208.90.112.0/22',
4313         'MT': '46.11.0.0/16',
4314         'MU': '105.16.0.0/12',
4315         'MV': '27.114.128.0/18',
4316         'MW': '102.70.0.0/15',
4317         'MX': '187.192.0.0/11',
4318         'MY': '175.136.0.0/13',
4319         'MZ': '197.218.0.0/15',
4320         'NA': '41.182.0.0/16',
4321         'NC': '101.101.0.0/18',
4322         'NE': '197.214.0.0/18',
4323         'NF': '203.17.240.0/22',
4324         'NG': '105.112.0.0/12',
4325         'NI': '186.76.0.0/15',
4326         'NL': '145.96.0.0/11',
4327         'NO': '84.208.0.0/13',
4328         'NP': '36.252.0.0/15',
4329         'NR': '203.98.224.0/19',
4330         'NU': '49.156.48.0/22',
4331         'NZ': '49.224.0.0/14',
4332         'OM': '5.36.0.0/15',
4333         'PA': '186.72.0.0/15',
4334         'PE': '186.160.0.0/14',
4335         'PF': '123.50.64.0/18',
4336         'PG': '124.240.192.0/19',
4337         'PH': '49.144.0.0/13',
4338         'PK': '39.32.0.0/11',
4339         'PL': '83.0.0.0/11',
4340         'PM': '70.36.0.0/20',
4341         'PR': '66.50.0.0/16',
4342         'PS': '188.161.0.0/16',
4343         'PT': '85.240.0.0/13',
4344         'PW': '202.124.224.0/20',
4345         'PY': '181.120.0.0/14',
4346         'QA': '37.210.0.0/15',
4347         'RE': '102.35.0.0/16',
4348         'RO': '79.112.0.0/13',
4349         'RS': '93.86.0.0/15',
4350         'RU': '5.136.0.0/13',
4351         'RW': '41.186.0.0/16',
4352         'SA': '188.48.0.0/13',
4353         'SB': '202.1.160.0/19',
4354         'SC': '154.192.0.0/11',
4355         'SD': '102.120.0.0/13',
4356         'SE': '78.64.0.0/12',
4357         'SG': '8.128.0.0/10',
4358         'SI': '188.196.0.0/14',
4359         'SK': '78.98.0.0/15',
4360         'SL': '102.143.0.0/17',
4361         'SM': '89.186.32.0/19',
4362         'SN': '41.82.0.0/15',
4363         'SO': '154.115.192.0/18',
4364         'SR': '186.179.128.0/17',
4365         'SS': '105.235.208.0/21',
4366         'ST': '197.159.160.0/19',
4367         'SV': '168.243.0.0/16',
4368         'SX': '190.102.0.0/20',
4369         'SY': '5.0.0.0/16',
4370         'SZ': '41.84.224.0/19',
4371         'TC': '65.255.48.0/20',
4372         'TD': '154.68.128.0/19',
4373         'TG': '196.168.0.0/14',
4374         'TH': '171.96.0.0/13',
4375         'TJ': '85.9.128.0/18',
4376         'TK': '27.96.24.0/21',
4377         'TL': '180.189.160.0/20',
4378         'TM': '95.85.96.0/19',
4379         'TN': '197.0.0.0/11',
4380         'TO': '175.176.144.0/21',
4381         'TR': '78.160.0.0/11',
4382         'TT': '186.44.0.0/15',
4383         'TV': '202.2.96.0/19',
4384         'TW': '120.96.0.0/11',
4385         'TZ': '156.156.0.0/14',
4386         'UA': '37.52.0.0/14',
4387         'UG': '102.80.0.0/13',
4388         'US': '6.0.0.0/8',
4389         'UY': '167.56.0.0/13',
4390         'UZ': '84.54.64.0/18',
4391         'VA': '212.77.0.0/19',
4392         'VC': '207.191.240.0/21',
4393         'VE': '186.88.0.0/13',
4394         'VG': '66.81.192.0/20',
4395         'VI': '146.226.0.0/16',
4396         'VN': '14.160.0.0/11',
4397         'VU': '202.80.32.0/20',
4398         'WF': '117.20.32.0/21',
4399         'WS': '202.4.32.0/19',
4400         'YE': '134.35.0.0/16',
4401         'YT': '41.242.116.0/22',
4402         'ZA': '41.0.0.0/11',
4403         'ZM': '102.144.0.0/13',
4404         'ZW': '102.177.192.0/18',
4405     }
4406
4407     @classmethod
4408     def random_ipv4(cls, code_or_block):
4409         if len(code_or_block) == 2:
4410             block = cls._country_ip_map.get(code_or_block.upper())
4411             if not block:
4412                 return None
4413         else:
4414             block = code_or_block
4415         addr, preflen = block.split('/')
4416         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
4417         addr_max = addr_min | (0xffffffff >> int(preflen))
4418         return compat_str(socket.inet_ntoa(
4419             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
4420
4421
4422 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
4423     def __init__(self, proxies=None):
4424         # Set default handlers
4425         for type in ('http', 'https'):
4426             setattr(self, '%s_open' % type,
4427                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4428                         meth(r, proxy, type))
4429         compat_urllib_request.ProxyHandler.__init__(self, proxies)
4430
4431     def proxy_open(self, req, proxy, type):
4432         req_proxy = req.headers.get('Ytdl-request-proxy')
4433         if req_proxy is not None:
4434             proxy = req_proxy
4435             del req.headers['Ytdl-request-proxy']
4436
4437         if proxy == '__noproxy__':
4438             return None  # No Proxy
4439         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4440             req.add_header('Ytdl-socks-proxy', proxy)
4441             # yt-dlp's http/https handlers do wrapping the socket with socks
4442             return None
4443         return compat_urllib_request.ProxyHandler.proxy_open(
4444             self, req, proxy, type)
4445
4446
4447 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4448 # released into Public Domain
4449 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4450
4451 def long_to_bytes(n, blocksize=0):
4452     """long_to_bytes(n:long, blocksize:int) : string
4453     Convert a long integer to a byte string.
4454
4455     If optional blocksize is given and greater than zero, pad the front of the
4456     byte string with binary zeros so that the length is a multiple of
4457     blocksize.
4458     """
4459     # after much testing, this algorithm was deemed to be the fastest
4460     s = b''
4461     n = int(n)
4462     while n > 0:
4463         s = compat_struct_pack('>I', n & 0xffffffff) + s
4464         n = n >> 32
4465     # strip off leading zeros
4466     for i in range(len(s)):
4467         if s[i] != b'\000'[0]:
4468             break
4469     else:
4470         # only happens when n == 0
4471         s = b'\000'
4472         i = 0
4473     s = s[i:]
4474     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4475     # de-padding being done above, but sigh...
4476     if blocksize > 0 and len(s) % blocksize:
4477         s = (blocksize - len(s) % blocksize) * b'\000' + s
4478     return s
4479
4480
4481 def bytes_to_long(s):
4482     """bytes_to_long(string) : long
4483     Convert a byte string to a long integer.
4484
4485     This is (essentially) the inverse of long_to_bytes().
4486     """
4487     acc = 0
4488     length = len(s)
4489     if length % 4:
4490         extra = (4 - length % 4)
4491         s = b'\000' * extra + s
4492         length = length + extra
4493     for i in range(0, length, 4):
4494         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
4495     return acc
4496
4497
4498 def ohdave_rsa_encrypt(data, exponent, modulus):
4499     '''
4500     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4501
4502     Input:
4503         data: data to encrypt, bytes-like object
4504         exponent, modulus: parameter e and N of RSA algorithm, both integer
4505     Output: hex string of encrypted data
4506
4507     Limitation: supports one block encryption only
4508     '''
4509
4510     payload = int(binascii.hexlify(data[::-1]), 16)
4511     encrypted = pow(payload, exponent, modulus)
4512     return '%x' % encrypted
4513
4514
4515 def pkcs1pad(data, length):
4516     """
4517     Padding input data with PKCS#1 scheme
4518
4519     @param {int[]} data        input data
4520     @param {int}   length      target length
4521     @returns {int[]}           padded data
4522     """
4523     if len(data) > length - 11:
4524         raise ValueError('Input data too long for PKCS#1 padding')
4525
4526     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4527     return [0, 2] + pseudo_random + [0] + data
4528
4529
4530 def encode_base_n(num, n, table=None):
4531     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
4532     if not table:
4533         table = FULL_TABLE[:n]
4534
4535     if n > len(table):
4536         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
4537
4538     if num == 0:
4539         return table[0]
4540
4541     ret = ''
4542     while num:
4543         ret = table[num % n] + ret
4544         num = num // n
4545     return ret
4546
4547
4548 def decode_packed_codes(code):
4549     mobj = re.search(PACKED_CODES_RE, code)
4550     obfuscated_code, base, count, symbols = mobj.groups()
4551     base = int(base)
4552     count = int(count)
4553     symbols = symbols.split('|')
4554     symbol_table = {}
4555
4556     while count:
4557         count -= 1
4558         base_n_count = encode_base_n(count, base)
4559         symbol_table[base_n_count] = symbols[count] or base_n_count
4560
4561     return re.sub(
4562         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4563         obfuscated_code)
4564
4565
4566 def caesar(s, alphabet, shift):
4567     if shift == 0:
4568         return s
4569     l = len(alphabet)
4570     return ''.join(
4571         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
4572         for c in s)
4573
4574
4575 def rot47(s):
4576     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4577
4578
4579 def parse_m3u8_attributes(attrib):
4580     info = {}
4581     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
4582         if val.startswith('"'):
4583             val = val[1:-1]
4584         info[key] = val
4585     return info
4586
4587
4588 def urshift(val, n):
4589     return val >> n if val >= 0 else (val + 0x100000000) >> n
4590
4591
4592 # Based on png2str() written by @gdkchan and improved by @yokrysty
4593 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
4594 def decode_png(png_data):
4595     # Reference: https://www.w3.org/TR/PNG/
4596     header = png_data[8:]
4597
4598     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
4599         raise OSError('Not a valid PNG file.')
4600
4601     int_map = {1: '>B', 2: '>H', 4: '>I'}
4602     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
4603
4604     chunks = []
4605
4606     while header:
4607         length = unpack_integer(header[:4])
4608         header = header[4:]
4609
4610         chunk_type = header[:4]
4611         header = header[4:]
4612
4613         chunk_data = header[:length]
4614         header = header[length:]
4615
4616         header = header[4:]  # Skip CRC
4617
4618         chunks.append({
4619             'type': chunk_type,
4620             'length': length,
4621             'data': chunk_data
4622         })
4623
4624     ihdr = chunks[0]['data']
4625
4626     width = unpack_integer(ihdr[:4])
4627     height = unpack_integer(ihdr[4:8])
4628
4629     idat = b''
4630
4631     for chunk in chunks:
4632         if chunk['type'] == b'IDAT':
4633             idat += chunk['data']
4634
4635     if not idat:
4636         raise OSError('Unable to read PNG data.')
4637
4638     decompressed_data = bytearray(zlib.decompress(idat))
4639
4640     stride = width * 3
4641     pixels = []
4642
4643     def _get_pixel(idx):
4644         x = idx % stride
4645         y = idx // stride
4646         return pixels[y][x]
4647
4648     for y in range(height):
4649         basePos = y * (1 + stride)
4650         filter_type = decompressed_data[basePos]
4651
4652         current_row = []
4653
4654         pixels.append(current_row)
4655
4656         for x in range(stride):
4657             color = decompressed_data[1 + basePos + x]
4658             basex = y * stride + x
4659             left = 0
4660             up = 0
4661
4662             if x > 2:
4663                 left = _get_pixel(basex - 3)
4664             if y > 0:
4665                 up = _get_pixel(basex - stride)
4666
4667             if filter_type == 1:  # Sub
4668                 color = (color + left) & 0xff
4669             elif filter_type == 2:  # Up
4670                 color = (color + up) & 0xff
4671             elif filter_type == 3:  # Average
4672                 color = (color + ((left + up) >> 1)) & 0xff
4673             elif filter_type == 4:  # Paeth
4674                 a = left
4675                 b = up
4676                 c = 0
4677
4678                 if x > 2 and y > 0:
4679                     c = _get_pixel(basex - stride - 3)
4680
4681                 p = a + b - c
4682
4683                 pa = abs(p - a)
4684                 pb = abs(p - b)
4685                 pc = abs(p - c)
4686
4687                 if pa <= pb and pa <= pc:
4688                     color = (color + a) & 0xff
4689                 elif pb <= pc:
4690                     color = (color + b) & 0xff
4691                 else:
4692                     color = (color + c) & 0xff
4693
4694             current_row.append(color)
4695
4696     return width, height, pixels
4697
4698
4699 def write_xattr(path, key, value):
4700     # This mess below finds the best xattr tool for the job
4701     try:
4702         # try the pyxattr module...
4703         import xattr
4704
4705         if hasattr(xattr, 'set'):  # pyxattr
4706             # Unicode arguments are not supported in python-pyxattr until
4707             # version 0.5.0
4708             # See https://github.com/ytdl-org/youtube-dl/issues/5498
4709             pyxattr_required_version = '0.5.0'
4710             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
4711                 # TODO: fallback to CLI tools
4712                 raise XAttrUnavailableError(
4713                     'python-pyxattr is detected but is too old. '
4714                     'yt-dlp requires %s or above while your version is %s. '
4715                     'Falling back to other xattr implementations' % (
4716                         pyxattr_required_version, xattr.__version__))
4717
4718             setxattr = xattr.set
4719         else:  # xattr
4720             setxattr = xattr.setxattr
4721
4722         try:
4723             setxattr(path, key, value)
4724         except OSError as e:
4725             raise XAttrMetadataError(e.errno, e.strerror)
4726
4727     except ImportError:
4728         if compat_os_name == 'nt':
4729             # Write xattrs to NTFS Alternate Data Streams:
4730             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
4731             assert ':' not in key
4732             assert os.path.exists(path)
4733
4734             ads_fn = path + ':' + key
4735             try:
4736                 with open(ads_fn, 'wb') as f:
4737                     f.write(value)
4738             except OSError as e:
4739                 raise XAttrMetadataError(e.errno, e.strerror)
4740         else:
4741             user_has_setfattr = check_executable('setfattr', ['--version'])
4742             user_has_xattr = check_executable('xattr', ['-h'])
4743
4744             if user_has_setfattr or user_has_xattr:
4745
4746                 value = value.decode('utf-8')
4747                 if user_has_setfattr:
4748                     executable = 'setfattr'
4749                     opts = ['-n', key, '-v', value]
4750                 elif user_has_xattr:
4751                     executable = 'xattr'
4752                     opts = ['-w', key, value]
4753
4754                 cmd = ([encodeFilename(executable, True)]
4755                        + [encodeArgument(o) for o in opts]
4756                        + [encodeFilename(path, True)])
4757
4758                 try:
4759                     p = Popen(
4760                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
4761                 except OSError as e:
4762                     raise XAttrMetadataError(e.errno, e.strerror)
4763                 stdout, stderr = p.communicate_or_kill()
4764                 stderr = stderr.decode('utf-8', 'replace')
4765                 if p.returncode != 0:
4766                     raise XAttrMetadataError(p.returncode, stderr)
4767
4768             else:
4769                 # On Unix, and can't find pyxattr, setfattr, or xattr.
4770                 if sys.platform.startswith('linux'):
4771                     raise XAttrUnavailableError(
4772                         "Couldn't find a tool to set the xattrs. "
4773                         "Install either the python 'pyxattr' or 'xattr' "
4774                         "modules, or the GNU 'attr' package "
4775                         "(which contains the 'setfattr' tool).")
4776                 else:
4777                     raise XAttrUnavailableError(
4778                         "Couldn't find a tool to set the xattrs. "
4779                         "Install either the python 'xattr' module, "
4780                         "or the 'xattr' binary.")
4781
4782
4783 def random_birthday(year_field, month_field, day_field):
4784     start_date = datetime.date(1950, 1, 1)
4785     end_date = datetime.date(1995, 12, 31)
4786     offset = random.randint(0, (end_date - start_date).days)
4787     random_date = start_date + datetime.timedelta(offset)
4788     return {
4789         year_field: str(random_date.year),
4790         month_field: str(random_date.month),
4791         day_field: str(random_date.day),
4792     }
4793
4794
4795 # Templates for internet shortcut files, which are plain text files.
4796 DOT_URL_LINK_TEMPLATE = '''\
4797 [InternetShortcut]
4798 URL=%(url)s
4799 '''
4800
4801 DOT_WEBLOC_LINK_TEMPLATE = '''\
4802 <?xml version="1.0" encoding="UTF-8"?>
4803 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
4804 <plist version="1.0">
4805 <dict>
4806 \t<key>URL</key>
4807 \t<string>%(url)s</string>
4808 </dict>
4809 </plist>
4810 '''
4811
4812 DOT_DESKTOP_LINK_TEMPLATE = '''\
4813 [Desktop Entry]
4814 Encoding=UTF-8
4815 Name=%(filename)s
4816 Type=Link
4817 URL=%(url)s
4818 Icon=text-html
4819 '''
4820
4821 LINK_TEMPLATES = {
4822     'url': DOT_URL_LINK_TEMPLATE,
4823     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
4824     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
4825 }
4826
4827
4828 def iri_to_uri(iri):
4829     """
4830     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
4831
4832     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
4833     """
4834
4835     iri_parts = compat_urllib_parse_urlparse(iri)
4836
4837     if '[' in iri_parts.netloc:
4838         raise ValueError('IPv6 URIs are not, yet, supported.')
4839         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
4840
4841     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
4842
4843     net_location = ''
4844     if iri_parts.username:
4845         net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
4846         if iri_parts.password is not None:
4847             net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
4848         net_location += '@'
4849
4850     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
4851     # The 'idna' encoding produces ASCII text.
4852     if iri_parts.port is not None and iri_parts.port != 80:
4853         net_location += ':' + str(iri_parts.port)
4854
4855     return urllib.parse.urlunparse(
4856         (iri_parts.scheme,
4857             net_location,
4858
4859             urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
4860
4861             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
4862             urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
4863
4864             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
4865             urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
4866
4867             urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
4868
4869     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
4870
4871
4872 def to_high_limit_path(path):
4873     if sys.platform in ['win32', 'cygwin']:
4874         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
4875         return '\\\\?\\' + os.path.abspath(path)
4876
4877     return path
4878
4879
4880 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
4881     val = traverse_obj(obj, *variadic(field))
4882     if val in ignore:
4883         return default
4884     return template % (func(val) if func else val)
4885
4886
4887 def clean_podcast_url(url):
4888     return re.sub(r'''(?x)
4889         (?:
4890             (?:
4891                 chtbl\.com/track|
4892                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
4893                 play\.podtrac\.com
4894             )/[^/]+|
4895             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
4896             flex\.acast\.com|
4897             pd(?:
4898                 cn\.co| # https://podcorn.com/analytics-prefix/
4899                 st\.fm # https://podsights.com/docs/
4900             )/e
4901         )/''', '', url)
4902
4903
4904 _HEX_TABLE = '0123456789abcdef'
4905
4906
4907 def random_uuidv4():
4908     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
4909
4910
4911 def make_dir(path, to_screen=None):
4912     try:
4913         dn = os.path.dirname(path)
4914         if dn and not os.path.exists(dn):
4915             os.makedirs(dn)
4916         return True
4917     except OSError as err:
4918         if callable(to_screen) is not None:
4919             to_screen('unable to create directory ' + error_to_compat_str(err))
4920         return False
4921
4922
4923 def get_executable_path():
4924     from zipimport import zipimporter
4925     if hasattr(sys, 'frozen'):  # Running from PyInstaller
4926         path = os.path.dirname(sys.executable)
4927     elif isinstance(__loader__, zipimporter):  # Running from ZIP
4928         path = os.path.join(os.path.dirname(__file__), '../..')
4929     else:
4930         path = os.path.join(os.path.dirname(__file__), '..')
4931     return os.path.abspath(path)
4932
4933
4934 def load_plugins(name, suffix, namespace):
4935     classes = {}
4936     try:
4937         plugins_spec = importlib.util.spec_from_file_location(
4938             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
4939         plugins = importlib.util.module_from_spec(plugins_spec)
4940         sys.modules[plugins_spec.name] = plugins
4941         plugins_spec.loader.exec_module(plugins)
4942         for name in dir(plugins):
4943             if name in namespace:
4944                 continue
4945             if not name.endswith(suffix):
4946                 continue
4947             klass = getattr(plugins, name)
4948             classes[name] = namespace[name] = klass
4949     except FileNotFoundError:
4950         pass
4951     return classes
4952
4953
4954 def traverse_obj(
4955         obj, *path_list, default=None, expected_type=None, get_all=True,
4956         casesense=True, is_user_input=False, traverse_string=False):
4957     ''' Traverse nested list/dict/tuple
4958     @param path_list        A list of paths which are checked one by one.
4959                             Each path is a list of keys where each key is a string,
4960                             a function, a tuple of strings/None or "...".
4961                             When a fuction is given, it takes the key and value as arguments
4962                             and returns whether the key matches or not. When a tuple is given,
4963                             all the keys given in the tuple are traversed, and
4964                             "..." traverses all the keys in the object
4965                             "None" returns the object without traversal
4966     @param default          Default value to return
4967     @param expected_type    Only accept final value of this type (Can also be any callable)
4968     @param get_all          Return all the values obtained from a path or only the first one
4969     @param casesense        Whether to consider dictionary keys as case sensitive
4970     @param is_user_input    Whether the keys are generated from user input. If True,
4971                             strings are converted to int/slice if necessary
4972     @param traverse_string  Whether to traverse inside strings. If True, any
4973                             non-compatible object will also be converted into a string
4974     # TODO: Write tests
4975     '''
4976     if not casesense:
4977         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
4978         path_list = (map(_lower, variadic(path)) for path in path_list)
4979
4980     def _traverse_obj(obj, path, _current_depth=0):
4981         nonlocal depth
4982         path = tuple(variadic(path))
4983         for i, key in enumerate(path):
4984             if None in (key, obj):
4985                 return obj
4986             if isinstance(key, (list, tuple)):
4987                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
4988                 key = ...
4989             if key is ...:
4990                 obj = (obj.values() if isinstance(obj, dict)
4991                        else obj if isinstance(obj, (list, tuple, LazyList))
4992                        else str(obj) if traverse_string else [])
4993                 _current_depth += 1
4994                 depth = max(depth, _current_depth)
4995                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
4996             elif callable(key):
4997                 if isinstance(obj, (list, tuple, LazyList)):
4998                     obj = enumerate(obj)
4999                 elif isinstance(obj, dict):
5000                     obj = obj.items()
5001                 else:
5002                     if not traverse_string:
5003                         return None
5004                     obj = str(obj)
5005                 _current_depth += 1
5006                 depth = max(depth, _current_depth)
5007                 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if try_call(key, args=(k, v))]
5008             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
5009                 obj = (obj.get(key) if casesense or (key in obj)
5010                        else next((v for k, v in obj.items() if _lower(k) == key), None))
5011             else:
5012                 if is_user_input:
5013                     key = (int_or_none(key) if ':' not in key
5014                            else slice(*map(int_or_none, key.split(':'))))
5015                     if key == slice(None):
5016                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
5017                 if not isinstance(key, (int, slice)):
5018                     return None
5019                 if not isinstance(obj, (list, tuple, LazyList)):
5020                     if not traverse_string:
5021                         return None
5022                     obj = str(obj)
5023                 try:
5024                     obj = obj[key]
5025                 except IndexError:
5026                     return None
5027         return obj
5028
5029     if isinstance(expected_type, type):
5030         type_test = lambda val: val if isinstance(val, expected_type) else None
5031     elif expected_type is not None:
5032         type_test = expected_type
5033     else:
5034         type_test = lambda val: val
5035
5036     for path in path_list:
5037         depth = 0
5038         val = _traverse_obj(obj, path)
5039         if val is not None:
5040             if depth:
5041                 for _ in range(depth - 1):
5042                     val = itertools.chain.from_iterable(v for v in val if v is not None)
5043                 val = [v for v in map(type_test, val) if v is not None]
5044                 if val:
5045                     return val if get_all else val[0]
5046             else:
5047                 val = type_test(val)
5048                 if val is not None:
5049                     return val
5050     return default
5051
5052
5053 def traverse_dict(dictn, keys, casesense=True):
5054     write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
5055                  'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
5056     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5057
5058
5059 def get_first(obj, keys, **kwargs):
5060     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
5061
5062
5063 def variadic(x, allowed_types=(str, bytes, dict)):
5064     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
5065
5066
5067 def decode_base(value, digits):
5068     # This will convert given base-x string to scalar (long or int)
5069     table = {char: index for index, char in enumerate(digits)}
5070     result = 0
5071     base = len(digits)
5072     for chr in value:
5073         result *= base
5074         result += table[chr]
5075     return result
5076
5077
5078 def time_seconds(**kwargs):
5079     t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
5080     return t.timestamp()
5081
5082
5083 # create a JSON Web Signature (jws) with HS256 algorithm
5084 # the resulting format is in JWS Compact Serialization
5085 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5086 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5087 def jwt_encode_hs256(payload_data, key, headers={}):
5088     header_data = {
5089         'alg': 'HS256',
5090         'typ': 'JWT',
5091     }
5092     if headers:
5093         header_data.update(headers)
5094     header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
5095     payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
5096     h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
5097     signature_b64 = base64.b64encode(h.digest())
5098     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5099     return token
5100
5101
5102 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5103 def jwt_decode_hs256(jwt):
5104     header_b64, payload_b64, signature_b64 = jwt.split('.')
5105     payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
5106     return payload_data
5107
5108
5109 def supports_terminal_sequences(stream):
5110     if compat_os_name == 'nt':
5111         from .compat import WINDOWS_VT_MODE  # Must be imported locally
5112         if not WINDOWS_VT_MODE or get_windows_version() < (10, 0, 10586):
5113             return False
5114     elif not os.getenv('TERM'):
5115         return False
5116     try:
5117         return stream.isatty()
5118     except BaseException:
5119         return False
5120
5121
5122 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5123
5124
5125 def remove_terminal_sequences(string):
5126     return _terminal_sequences_re.sub('', string)
5127
5128
5129 def number_of_digits(number):
5130     return len('%d' % number)
5131
5132
5133 def join_nonempty(*values, delim='-', from_dict=None):
5134     if from_dict is not None:
5135         values = map(from_dict.get, values)
5136     return delim.join(map(str, filter(None, values)))
5137
5138
5139 def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
5140     """
5141     Find the largest format dimensions in terms of video width and, for each thumbnail:
5142     * Modify the URL: Match the width with the provided regex and replace with the former width
5143     * Update dimensions
5144
5145     This function is useful with video services that scale the provided thumbnails on demand
5146     """
5147     _keys = ('width', 'height')
5148     max_dimensions = max(
5149         (tuple(format.get(k) or 0 for k in _keys) for format in formats),
5150         default=(0, 0))
5151     if not max_dimensions[0]:
5152         return thumbnails
5153     return [
5154         merge_dicts(
5155             {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
5156             dict(zip(_keys, max_dimensions)), thumbnail)
5157         for thumbnail in thumbnails
5158     ]
5159
5160
5161 def parse_http_range(range):
5162     """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
5163     if not range:
5164         return None, None, None
5165     crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
5166     if not crg:
5167         return None, None, None
5168     return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
5169
5170
5171 class Config:
5172     own_args = None
5173     filename = None
5174     __initialized = False
5175
5176     def __init__(self, parser, label=None):
5177         self._parser, self.label = parser, label
5178         self._loaded_paths, self.configs = set(), []
5179
5180     def init(self, args=None, filename=None):
5181         assert not self.__initialized
5182         directory = ''
5183         if filename:
5184             location = os.path.realpath(filename)
5185             directory = os.path.dirname(location)
5186             if location in self._loaded_paths:
5187                 return False
5188             self._loaded_paths.add(location)
5189
5190         self.__initialized = True
5191         self.own_args, self.filename = args, filename
5192         for location in self._parser.parse_args(args)[0].config_locations or []:
5193             location = os.path.join(directory, expand_path(location))
5194             if os.path.isdir(location):
5195                 location = os.path.join(location, 'yt-dlp.conf')
5196             if not os.path.exists(location):
5197                 self._parser.error(f'config location {location} does not exist')
5198             self.append_config(self.read_file(location), location)
5199         return True
5200
5201     def __str__(self):
5202         label = join_nonempty(
5203             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5204             delim=' ')
5205         return join_nonempty(
5206             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5207             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5208             delim='\n')
5209
5210     @staticmethod
5211     def read_file(filename, default=[]):
5212         try:
5213             optionf = open(filename)
5214         except OSError:
5215             return default  # silently skip if file is not present
5216         try:
5217             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5218             contents = optionf.read()
5219             res = shlex.split(contents, comments=True)
5220         finally:
5221             optionf.close()
5222         return res
5223
5224     @staticmethod
5225     def hide_login_info(opts):
5226         PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
5227         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5228
5229         def _scrub_eq(o):
5230             m = eqre.match(o)
5231             if m:
5232                 return m.group('key') + '=PRIVATE'
5233             else:
5234                 return o
5235
5236         opts = list(map(_scrub_eq, opts))
5237         for idx, opt in enumerate(opts):
5238             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5239                 opts[idx + 1] = 'PRIVATE'
5240         return opts
5241
5242     def append_config(self, *args, label=None):
5243         config = type(self)(self._parser, label)
5244         config._loaded_paths = self._loaded_paths
5245         if config.init(*args):
5246             self.configs.append(config)
5247
5248     @property
5249     def all_args(self):
5250         for config in reversed(self.configs):
5251             yield from config.all_args
5252         yield from self.own_args or []
5253
5254     def parse_args(self):
5255         return self._parser.parse_args(list(self.all_args))
5256
5257
5258 class WebSocketsWrapper():
5259     """Wraps websockets module to use in non-async scopes"""
5260
5261     def __init__(self, url, headers=None, connect=True):
5262         self.loop = asyncio.events.new_event_loop()
5263         self.conn = compat_websockets.connect(
5264             url, extra_headers=headers, ping_interval=None,
5265             close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
5266         if connect:
5267             self.__enter__()
5268         atexit.register(self.__exit__, None, None, None)
5269
5270     def __enter__(self):
5271         if not self.pool:
5272             self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
5273         return self
5274
5275     def send(self, *args):
5276         self.run_with_loop(self.pool.send(*args), self.loop)
5277
5278     def recv(self, *args):
5279         return self.run_with_loop(self.pool.recv(*args), self.loop)
5280
5281     def __exit__(self, type, value, traceback):
5282         try:
5283             return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
5284         finally:
5285             self.loop.close()
5286             self._cancel_all_tasks(self.loop)
5287
5288     # taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
5289     # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
5290     @staticmethod
5291     def run_with_loop(main, loop):
5292         if not asyncio.coroutines.iscoroutine(main):
5293             raise ValueError(f'a coroutine was expected, got {main!r}')
5294
5295         try:
5296             return loop.run_until_complete(main)
5297         finally:
5298             loop.run_until_complete(loop.shutdown_asyncgens())
5299             if hasattr(loop, 'shutdown_default_executor'):
5300                 loop.run_until_complete(loop.shutdown_default_executor())
5301
5302     @staticmethod
5303     def _cancel_all_tasks(loop):
5304         to_cancel = asyncio.tasks.all_tasks(loop)
5305
5306         if not to_cancel:
5307             return
5308
5309         for task in to_cancel:
5310             task.cancel()
5311
5312         loop.run_until_complete(
5313             asyncio.tasks.gather(*to_cancel, loop=loop, return_exceptions=True))
5314
5315         for task in to_cancel:
5316             if task.cancelled():
5317                 continue
5318             if task.exception() is not None:
5319                 loop.call_exception_handler({
5320                     'message': 'unhandled exception during asyncio.run() shutdown',
5321                     'exception': task.exception(),
5322                     'task': task,
5323                 })
5324
5325
5326 has_websockets = bool(compat_websockets)
5327
5328
5329 def merge_headers(*dicts):
5330     """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
5331     return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
5332
5333
5334 class classproperty:
5335     def __init__(self, f):
5336         self.f = f
5337
5338     def __get__(self, _, cls):
5339         return self.f(cls)