yt_dlp/utils.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import hashlib
  20 import hmac
  21 import importlib.util
  22 import io
  23 import itertools
  24 import json
  25 import locale
  26 import math
  27 import operator
  28 import os
  29 import platform
  30 import random
  31 import re
  32 import socket
  33 import ssl
  34 import subprocess
  35 import sys
  36 import tempfile
  37 import time
  38 import traceback
  39 import xml.etree.ElementTree
  40 import zlib
  41 import mimetypes
  42
  43 from .compat import (
  44     compat_HTMLParseError,
  45     compat_HTMLParser,
  46     compat_HTTPError,
  47     compat_basestring,
  48     compat_chr,
  49     compat_cookiejar,
  50     compat_ctypes_WINFUNCTYPE,
  51     compat_etree_fromstring,
  52     compat_expanduser,
  53     compat_html_entities,
  54     compat_html_entities_html5,
  55     compat_http_client,
  56     compat_integer_types,
  57     compat_numeric_types,
  58     compat_kwargs,
  59     compat_os_name,
  60     compat_parse_qs,
  61     compat_shlex_split,
  62     compat_shlex_quote,
  63     compat_str,
  64     compat_struct_pack,
  65     compat_struct_unpack,
  66     compat_urllib_error,
  67     compat_urllib_parse,
  68     compat_urllib_parse_urlencode,
  69     compat_urllib_parse_urlparse,
  70     compat_urllib_parse_urlunparse,
  71     compat_urllib_parse_quote,
  72     compat_urllib_parse_quote_plus,
  73     compat_urllib_parse_unquote_plus,
  74     compat_urllib_request,
  75     compat_urlparse,
  76     compat_xpath,
  77 )
  78
  79 from .socks import (
  80     ProxyType,
  81     sockssocket,
  82 )
  83
  84
  85 def register_socks_protocols():
  86     # "Register" SOCKS protocols
  87     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  88     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  89     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  90         if scheme not in compat_urlparse.uses_netloc:
  91             compat_urlparse.uses_netloc.append(scheme)
  92
  93
  94 # This is not clearly defined otherwise
  95 compiled_regex_type = type(re.compile(''))
  96
  97
  98 def random_user_agent():
  99     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
 100     _CHROME_VERSIONS = (
 101         '90.0.4430.212',
 102         '90.0.4430.24',
 103         '90.0.4430.70',
 104         '90.0.4430.72',
 105         '90.0.4430.85',
 106         '90.0.4430.93',
 107         '91.0.4472.101',
 108         '91.0.4472.106',
 109         '91.0.4472.114',
 110         '91.0.4472.124',
 111         '91.0.4472.164',
 112         '91.0.4472.19',
 113         '91.0.4472.77',
 114         '92.0.4515.107',
 115         '92.0.4515.115',
 116         '92.0.4515.131',
 117         '92.0.4515.159',
 118         '92.0.4515.43',
 119         '93.0.4556.0',
 120         '93.0.4577.15',
 121         '93.0.4577.63',
 122         '93.0.4577.82',
 123         '94.0.4606.41',
 124         '94.0.4606.54',
 125         '94.0.4606.61',
 126         '94.0.4606.71',
 127         '94.0.4606.81',
 128         '94.0.4606.85',
 129         '95.0.4638.17',
 130         '95.0.4638.50',
 131         '95.0.4638.54',
 132         '95.0.4638.69',
 133         '95.0.4638.74',
 134         '96.0.4664.18',
 135         '96.0.4664.45',
 136         '96.0.4664.55',
 137         '96.0.4664.93',
 138         '97.0.4692.20',
 139     )
 140     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 141
 142
 143 std_headers = {
 144     'User-Agent': random_user_agent(),
 145     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 146     'Accept-Encoding': 'gzip, deflate',
 147     'Accept-Language': 'en-us,en;q=0.5',
 148 }
 149
 150
 151 USER_AGENTS = {
 152     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 153 }
 154
 155
 156 NO_DEFAULT = object()
 157
 158 ENGLISH_MONTH_NAMES = [
 159     'January', 'February', 'March', 'April', 'May', 'June',
 160     'July', 'August', 'September', 'October', 'November', 'December']
 161
 162 MONTH_NAMES = {
 163     'en': ENGLISH_MONTH_NAMES,
 164     'fr': [
 165         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 166         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 167 }
 168
 169 KNOWN_EXTENSIONS = (
 170     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
 171     'flv', 'f4v', 'f4a', 'f4b',
 172     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
 173     'mkv', 'mka', 'mk3d',
 174     'avi', 'divx',
 175     'mov',
 176     'asf', 'wmv', 'wma',
 177     '3gp', '3g2',
 178     'mp3',
 179     'flac',
 180     'ape',
 181     'wav',
 182     'f4f', 'f4m', 'm3u8', 'smil')
 183
 184 # needed for sanitizing filenames in restricted mode
 185 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 186                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 187                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 188
 189 DATE_FORMATS = (
 190     '%d %B %Y',
 191     '%d %b %Y',
 192     '%B %d %Y',
 193     '%B %dst %Y',
 194     '%B %dnd %Y',
 195     '%B %drd %Y',
 196     '%B %dth %Y',
 197     '%b %d %Y',
 198     '%b %dst %Y',
 199     '%b %dnd %Y',
 200     '%b %drd %Y',
 201     '%b %dth %Y',
 202     '%b %dst %Y %I:%M',
 203     '%b %dnd %Y %I:%M',
 204     '%b %drd %Y %I:%M',
 205     '%b %dth %Y %I:%M',
 206     '%Y %m %d',
 207     '%Y-%m-%d',
 208     '%Y.%m.%d.',
 209     '%Y/%m/%d',
 210     '%Y/%m/%d %H:%M',
 211     '%Y/%m/%d %H:%M:%S',
 212     '%Y%m%d%H%M',
 213     '%Y%m%d%H%M%S',
 214     '%Y%m%d',
 215     '%Y-%m-%d %H:%M',
 216     '%Y-%m-%d %H:%M:%S',
 217     '%Y-%m-%d %H:%M:%S.%f',
 218     '%Y-%m-%d %H:%M:%S:%f',
 219     '%d.%m.%Y %H:%M',
 220     '%d.%m.%Y %H.%M',
 221     '%Y-%m-%dT%H:%M:%SZ',
 222     '%Y-%m-%dT%H:%M:%S.%fZ',
 223     '%Y-%m-%dT%H:%M:%S.%f0Z',
 224     '%Y-%m-%dT%H:%M:%S',
 225     '%Y-%m-%dT%H:%M:%S.%f',
 226     '%Y-%m-%dT%H:%M',
 227     '%b %d %Y at %H:%M',
 228     '%b %d %Y at %H:%M:%S',
 229     '%B %d %Y at %H:%M',
 230     '%B %d %Y at %H:%M:%S',
 231     '%H:%M %d-%b-%Y',
 232 )
 233
 234 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 235 DATE_FORMATS_DAY_FIRST.extend([
 236     '%d-%m-%Y',
 237     '%d.%m.%Y',
 238     '%d.%m.%y',
 239     '%d/%m/%Y',
 240     '%d/%m/%y',
 241     '%d/%m/%Y %H:%M:%S',
 242 ])
 243
 244 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 245 DATE_FORMATS_MONTH_FIRST.extend([
 246     '%m-%d-%Y',
 247     '%m.%d.%Y',
 248     '%m/%d/%Y',
 249     '%m/%d/%y',
 250     '%m/%d/%Y %H:%M:%S',
 251 ])
 252
 253 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 254 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
 255
 256
 257 def preferredencoding():
 258     """Get preferred encoding.
 259
 260     Returns the best encoding scheme for the system, based on
 261     locale.getpreferredencoding() and some further tweaks.
 262     """
 263     try:
 264         pref = locale.getpreferredencoding()
 265         'TEST'.encode(pref)
 266     except Exception:
 267         pref = 'UTF-8'
 268
 269     return pref
 270
 271
 272 def write_json_file(obj, fn):
 273     """ Encode obj as JSON and write it to fn, atomically if possible """
 274
 275     fn = encodeFilename(fn)
 276     if sys.version_info < (3, 0) and sys.platform != 'win32':
 277         encoding = get_filesystem_encoding()
 278         # os.path.basename returns a bytes object, but NamedTemporaryFile
 279         # will fail if the filename contains non ascii characters unless we
 280         # use a unicode object
 281         path_basename = lambda f: os.path.basename(fn).decode(encoding)
 282         # the same for os.path.dirname
 283         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
 284     else:
 285         path_basename = os.path.basename
 286         path_dirname = os.path.dirname
 287
 288     args = {
 289         'suffix': '.tmp',
 290         'prefix': path_basename(fn) + '.',
 291         'dir': path_dirname(fn),
 292         'delete': False,
 293     }
 294
 295     # In Python 2.x, json.dump expects a bytestream.
 296     # In Python 3.x, it writes to a character stream
 297     if sys.version_info < (3, 0):
 298         args['mode'] = 'wb'
 299     else:
 300         args.update({
 301             'mode': 'w',
 302             'encoding': 'utf-8',
 303         })
 304
 305     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
 306
 307     try:
 308         with tf:
 309             json.dump(obj, tf, ensure_ascii=False)
 310         if sys.platform == 'win32':
 311             # Need to remove existing file on Windows, else os.rename raises
 312             # WindowsError or FileExistsError.
 313             try:
 314                 os.unlink(fn)
 315             except OSError:
 316                 pass
 317         try:
 318             mask = os.umask(0)
 319             os.umask(mask)
 320             os.chmod(tf.name, 0o666 & ~mask)
 321         except OSError:
 322             pass
 323         os.rename(tf.name, fn)
 324     except Exception:
 325         try:
 326             os.remove(tf.name)
 327         except OSError:
 328             pass
 329         raise
 330
 331
 332 if sys.version_info >= (2, 7):
 333     def find_xpath_attr(node, xpath, key, val=None):
 334         """ Find the xpath xpath[@key=val] """
 335         assert re.match(r'^[a-zA-Z_-]+$', key)
 336         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
 337         return node.find(expr)
 338 else:
 339     def find_xpath_attr(node, xpath, key, val=None):
 340         for f in node.findall(compat_xpath(xpath)):
 341             if key not in f.attrib:
 342                 continue
 343             if val is None or f.attrib.get(key) == val:
 344                 return f
 345         return None
 346
 347 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 348 # the namespace parameter
 349
 350
 351 def xpath_with_ns(path, ns_map):
 352     components = [c.split(':') for c in path.split('/')]
 353     replaced = []
 354     for c in components:
 355         if len(c) == 1:
 356             replaced.append(c[0])
 357         else:
 358             ns, tag = c
 359             replaced.append('{%s}%s' % (ns_map[ns], tag))
 360     return '/'.join(replaced)
 361
 362
 363 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 364     def _find_xpath(xpath):
 365         return node.find(compat_xpath(xpath))
 366
 367     if isinstance(xpath, (str, compat_str)):
 368         n = _find_xpath(xpath)
 369     else:
 370         for xp in xpath:
 371             n = _find_xpath(xp)
 372             if n is not None:
 373                 break
 374
 375     if n is None:
 376         if default is not NO_DEFAULT:
 377             return default
 378         elif fatal:
 379             name = xpath if name is None else name
 380             raise ExtractorError('Could not find XML element %s' % name)
 381         else:
 382             return None
 383     return n
 384
 385
 386 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 387     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 388     if n is None or n == default:
 389         return n
 390     if n.text is None:
 391         if default is not NO_DEFAULT:
 392             return default
 393         elif fatal:
 394             name = xpath if name is None else name
 395             raise ExtractorError('Could not find XML element\'s text %s' % name)
 396         else:
 397             return None
 398     return n.text
 399
 400
 401 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 402     n = find_xpath_attr(node, xpath, key)
 403     if n is None:
 404         if default is not NO_DEFAULT:
 405             return default
 406         elif fatal:
 407             name = '%s[@%s]' % (xpath, key) if name is None else name
 408             raise ExtractorError('Could not find XML attribute %s' % name)
 409         else:
 410             return None
 411     return n.attrib[key]
 412
 413
 414 def get_element_by_id(id, html):
 415     """Return the content of the tag with the specified ID in the passed HTML document"""
 416     return get_element_by_attribute('id', id, html)
 417
 418
 419 def get_element_by_class(class_name, html):
 420     """Return the content of the first tag with the specified class in the passed HTML document"""
 421     retval = get_elements_by_class(class_name, html)
 422     return retval[0] if retval else None
 423
 424
 425 def get_element_by_attribute(attribute, value, html, escape_value=True):
 426     retval = get_elements_by_attribute(attribute, value, html, escape_value)
 427     return retval[0] if retval else None
 428
 429
 430 def get_elements_by_class(class_name, html):
 431     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 432     return get_elements_by_attribute(
 433         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
 434         html, escape_value=False)
 435
 436
 437 def get_elements_by_attribute(attribute, value, html, escape_value=True):
 438     """Return the content of the tag with the specified attribute in the passed HTML document"""
 439
 440     value = re.escape(value) if escape_value else value
 441
 442     retlist = []
 443     for m in re.finditer(r'''(?xs)
 444         <([a-zA-Z0-9:._-]+)
 445          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
 446          \s+%s=['"]?%s['"]?
 447          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
 448         \s*>
 449         (?P<content>.*?)
 450         </\1>
 451     ''' % (re.escape(attribute), value), html):
 452         res = m.group('content')
 453
 454         if res.startswith('"') or res.startswith("'"):
 455             res = res[1:-1]
 456
 457         retlist.append(unescapeHTML(res))
 458
 459     return retlist
 460
 461
 462 class HTMLAttributeParser(compat_HTMLParser):
 463     """Trivial HTML parser to gather the attributes for a single element"""
 464
 465     def __init__(self):
 466         self.attrs = {}
 467         compat_HTMLParser.__init__(self)
 468
 469     def handle_starttag(self, tag, attrs):
 470         self.attrs = dict(attrs)
 471
 472
 473 class HTMLListAttrsParser(compat_HTMLParser):
 474     """HTML parser to gather the attributes for the elements of a list"""
 475
 476     def __init__(self):
 477         compat_HTMLParser.__init__(self)
 478         self.items = []
 479         self._level = 0
 480
 481     def handle_starttag(self, tag, attrs):
 482         if tag == 'li' and self._level == 0:
 483             self.items.append(dict(attrs))
 484         self._level += 1
 485
 486     def handle_endtag(self, tag):
 487         self._level -= 1
 488
 489
 490 def extract_attributes(html_element):
 491     """Given a string for an HTML element such as
 492     <el
 493          a="foo" B="bar" c="&98;az" d=boz
 494          empty= noval entity="&amp;"
 495          sq='"' dq="'"
 496     >
 497     Decode and return a dictionary of attributes.
 498     {
 499         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 500         'empty': '', 'noval': None, 'entity': '&',
 501         'sq': '"', 'dq': '\''
 502     }.
 503     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
 504     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
 505     """
 506     parser = HTMLAttributeParser()
 507     try:
 508         parser.feed(html_element)
 509         parser.close()
 510     # Older Python may throw HTMLParseError in case of malformed HTML
 511     except compat_HTMLParseError:
 512         pass
 513     return parser.attrs
 514
 515
 516 def parse_list(webpage):
 517     """Given a string for an series of HTML <li> elements,
 518     return a dictionary of their attributes"""
 519     parser = HTMLListAttrsParser()
 520     parser.feed(webpage)
 521     parser.close()
 522     return parser.items
 523
 524
 525 def clean_html(html):
 526     """Clean an HTML snippet into a readable string"""
 527
 528     if html is None:  # Convenience for sanitizing descriptions etc.
 529         return html
 530
 531     # Newline vs <br />
 532     html = html.replace('\n', ' ')
 533     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
 534     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
 535     # Strip html tags
 536     html = re.sub('<.*?>', '', html)
 537     # Replace html entities
 538     html = unescapeHTML(html)
 539     return html.strip()
 540
 541
 542 def sanitize_open(filename, open_mode):
 543     """Try to open the given filename, and slightly tweak it if this fails.
 544
 545     Attempts to open the given filename. If this fails, it tries to change
 546     the filename slightly, step by step, until it's either able to open it
 547     or it fails and raises a final exception, like the standard open()
 548     function.
 549
 550     It returns the tuple (stream, definitive_file_name).
 551     """
 552     try:
 553         if filename == '-':
 554             if sys.platform == 'win32':
 555                 import msvcrt
 556                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 557             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 558         stream = open(encodeFilename(filename), open_mode)
 559         return (stream, filename)
 560     except (IOError, OSError) as err:
 561         if err.errno in (errno.EACCES,):
 562             raise
 563
 564         # In case of error, try to remove win32 forbidden chars
 565         alt_filename = sanitize_path(filename)
 566         if alt_filename == filename:
 567             raise
 568         else:
 569             # An exception here should be caught in the caller
 570             stream = open(encodeFilename(alt_filename), open_mode)
 571             return (stream, alt_filename)
 572
 573
 574 def timeconvert(timestr):
 575     """Convert RFC 2822 defined time string into system timestamp"""
 576     timestamp = None
 577     timetuple = email.utils.parsedate_tz(timestr)
 578     if timetuple is not None:
 579         timestamp = email.utils.mktime_tz(timetuple)
 580     return timestamp
 581
 582
 583 def sanitize_filename(s, restricted=False, is_id=False):
 584     """Sanitizes a string so it could be used as part of a filename.
 585     If restricted is set, use a stricter subset of allowed characters.
 586     Set is_id if this is not an arbitrary string, but an ID that should be kept
 587     if possible.
 588     """
 589     def replace_insane(char):
 590         if restricted and char in ACCENT_CHARS:
 591             return ACCENT_CHARS[char]
 592         elif not restricted and char == '\n':
 593             return ' '
 594         elif char == '?' or ord(char) < 32 or ord(char) == 127:
 595             return ''
 596         elif char == '"':
 597             return '' if restricted else '\''
 598         elif char == ':':
 599             return '_-' if restricted else ' -'
 600         elif char in '\\/|*<>':
 601             return '_'
 602         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
 603             return '_'
 604         if restricted and ord(char) > 127:
 605             return '_'
 606         return char
 607
 608     if s == '':
 609         return ''
 610     # Handle timestamps
 611     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
 612     result = ''.join(map(replace_insane, s))
 613     if not is_id:
 614         while '__' in result:
 615             result = result.replace('__', '_')
 616         result = result.strip('_')
 617         # Common case of "Foreign band name - English song title"
 618         if restricted and result.startswith('-_'):
 619             result = result[2:]
 620         if result.startswith('-'):
 621             result = '_' + result[len('-'):]
 622         result = result.lstrip('.')
 623         if not result:
 624             result = '_'
 625     return result
 626
 627
 628 def sanitize_path(s, force=False):
 629     """Sanitizes and normalizes path on Windows"""
 630     if sys.platform == 'win32':
 631         force = False
 632         drive_or_unc, _ = os.path.splitdrive(s)
 633         if sys.version_info < (2, 7) and not drive_or_unc:
 634             drive_or_unc, _ = os.path.splitunc(s)
 635     elif force:
 636         drive_or_unc = ''
 637     else:
 638         return s
 639
 640     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 641     if drive_or_unc:
 642         norm_path.pop(0)
 643     sanitized_path = [
 644         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 645         for path_part in norm_path]
 646     if drive_or_unc:
 647         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 648     elif force and s[0] == os.path.sep:
 649         sanitized_path.insert(0, os.path.sep)
 650     return os.path.join(*sanitized_path)
 651
 652
 653 def sanitize_url(url):
 654     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 655     # the number of unwanted failures due to missing protocol
 656     if url.startswith('//'):
 657         return 'http:%s' % url
 658     # Fix some common typos seen so far
 659     COMMON_TYPOS = (
 660         # https://github.com/ytdl-org/youtube-dl/issues/15649
 661         (r'^httpss://', r'https://'),
 662         # https://bx1.be/lives/direct-tv/
 663         (r'^rmtp([es]?)://', r'rtmp\1://'),
 664     )
 665     for mistake, fixup in COMMON_TYPOS:
 666         if re.match(mistake, url):
 667             return re.sub(mistake, fixup, url)
 668     return url
 669
 670
 671 def extract_basic_auth(url):
 672     parts = compat_urlparse.urlsplit(url)
 673     if parts.username is None:
 674         return url, None
 675     url = compat_urlparse.urlunsplit(parts._replace(netloc=(
 676         parts.hostname if parts.port is None
 677         else '%s:%d' % (parts.hostname, parts.port))))
 678     auth_payload = base64.b64encode(
 679         ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
 680     return url, 'Basic ' + auth_payload.decode('utf-8')
 681
 682
 683 def sanitized_Request(url, *args, **kwargs):
 684     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
 685     if auth_header is not None:
 686         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
 687         headers['Authorization'] = auth_header
 688     return compat_urllib_request.Request(url, *args, **kwargs)
 689
 690
 691 def expand_path(s):
 692     """Expand shell variables and ~"""
 693     return os.path.expandvars(compat_expanduser(s))
 694
 695
 696 def orderedSet(iterable):
 697     """ Remove all duplicates from the input iterable """
 698     res = []
 699     for el in iterable:
 700         if el not in res:
 701             res.append(el)
 702     return res
 703
 704
 705 def _htmlentity_transform(entity_with_semicolon):
 706     """Transforms an HTML entity to a character."""
 707     entity = entity_with_semicolon[:-1]
 708
 709     # Known non-numeric HTML entity
 710     if entity in compat_html_entities.name2codepoint:
 711         return compat_chr(compat_html_entities.name2codepoint[entity])
 712
 713     # TODO: HTML5 allows entities without a semicolon. For example,
 714     # '&Eacuteric' should be decoded as 'Éric'.
 715     if entity_with_semicolon in compat_html_entities_html5:
 716         return compat_html_entities_html5[entity_with_semicolon]
 717
 718     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 719     if mobj is not None:
 720         numstr = mobj.group(1)
 721         if numstr.startswith('x'):
 722             base = 16
 723             numstr = '0%s' % numstr
 724         else:
 725             base = 10
 726         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 727         try:
 728             return compat_chr(int(numstr, base))
 729         except ValueError:
 730             pass
 731
 732     # Unknown entity in name, return its literal representation
 733     return '&%s;' % entity
 734
 735
 736 def unescapeHTML(s):
 737     if s is None:
 738         return None
 739     assert type(s) == compat_str
 740
 741     return re.sub(
 742         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 743
 744
 745 def escapeHTML(text):
 746     return (
 747         text
 748         .replace('&', '&amp;')
 749         .replace('<', '&lt;')
 750         .replace('>', '&gt;')
 751         .replace('"', '&quot;')
 752         .replace("'", '&#39;')
 753     )
 754
 755
 756 def process_communicate_or_kill(p, *args, **kwargs):
 757     try:
 758         return p.communicate(*args, **kwargs)
 759     except BaseException:  # Including KeyboardInterrupt
 760         p.kill()
 761         p.wait()
 762         raise
 763
 764
 765 class Popen(subprocess.Popen):
 766     if sys.platform == 'win32':
 767         _startupinfo = subprocess.STARTUPINFO()
 768         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 769     else:
 770         _startupinfo = None
 771
 772     def __init__(self, *args, **kwargs):
 773         super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo)
 774
 775     def communicate_or_kill(self, *args, **kwargs):
 776         return process_communicate_or_kill(self, *args, **kwargs)
 777
 778
 779 def get_subprocess_encoding():
 780     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 781         # For subprocess calls, encode with locale encoding
 782         # Refer to http://stackoverflow.com/a/9951851/35070
 783         encoding = preferredencoding()
 784     else:
 785         encoding = sys.getfilesystemencoding()
 786     if encoding is None:
 787         encoding = 'utf-8'
 788     return encoding
 789
 790
 791 def encodeFilename(s, for_subprocess=False):
 792     """
 793     @param s The name of the file
 794     """
 795
 796     assert type(s) == compat_str
 797
 798     # Python 3 has a Unicode API
 799     if sys.version_info >= (3, 0):
 800         return s
 801
 802     # Pass '' directly to use Unicode APIs on Windows 2000 and up
 803     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
 804     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
 805     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 806         return s
 807
 808     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
 809     if sys.platform.startswith('java'):
 810         return s
 811
 812     return s.encode(get_subprocess_encoding(), 'ignore')
 813
 814
 815 def decodeFilename(b, for_subprocess=False):
 816
 817     if sys.version_info >= (3, 0):
 818         return b
 819
 820     if not isinstance(b, bytes):
 821         return b
 822
 823     return b.decode(get_subprocess_encoding(), 'ignore')
 824
 825
 826 def encodeArgument(s):
 827     if not isinstance(s, compat_str):
 828         # Legacy code that uses byte strings
 829         # Uncomment the following line after fixing all post processors
 830         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 831         s = s.decode('ascii')
 832     return encodeFilename(s, True)
 833
 834
 835 def decodeArgument(b):
 836     return decodeFilename(b, True)
 837
 838
 839 def decodeOption(optval):
 840     if optval is None:
 841         return optval
 842     if isinstance(optval, bytes):
 843         optval = optval.decode(preferredencoding())
 844
 845     assert isinstance(optval, compat_str)
 846     return optval
 847
 848
 849 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 850
 851
 852 def timetuple_from_msec(msec):
 853     secs, msec = divmod(msec, 1000)
 854     mins, secs = divmod(secs, 60)
 855     hrs, mins = divmod(mins, 60)
 856     return _timetuple(hrs, mins, secs, msec)
 857
 858
 859 def formatSeconds(secs, delim=':', msec=False):
 860     time = timetuple_from_msec(secs * 1000)
 861     if time.hours:
 862         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 863     elif time.minutes:
 864         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
 865     else:
 866         ret = '%d' % time.seconds
 867     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
 868
 869
 870 def _ssl_load_windows_store_certs(ssl_context, storename):
 871     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
 872     try:
 873         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
 874                  if encoding == 'x509_asn' and (
 875                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
 876     except PermissionError:
 877         return
 878     for cert in certs:
 879         try:
 880             ssl_context.load_verify_locations(cadata=cert)
 881         except ssl.SSLError:
 882             pass
 883
 884
 885 def make_HTTPS_handler(params, **kwargs):
 886     opts_check_certificate = not params.get('nocheckcertificate')
 887     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 888     context.check_hostname = opts_check_certificate
 889     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
 890     if opts_check_certificate:
 891         try:
 892             context.load_default_certs()
 893             # Work around the issue in load_default_certs when there are bad certificates. See:
 894             # https://github.com/yt-dlp/yt-dlp/issues/1060,
 895             # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
 896         except ssl.SSLError:
 897             # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
 898             if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
 899                 # Create a new context to discard any certificates that were already loaded
 900                 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
 901                 context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
 902                 for storename in ('CA', 'ROOT'):
 903                     _ssl_load_windows_store_certs(context, storename)
 904             context.set_default_verify_paths()
 905     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 906
 907
 908 def bug_reports_message(before=';'):
 909     if ytdl_is_updateable():
 910         update_cmd = 'type  yt-dlp -U  to update'
 911     else:
 912         update_cmd = 'see  https://github.com/yt-dlp/yt-dlp  on how to update'
 913     msg = 'please report this issue on  https://github.com/yt-dlp/yt-dlp .'
 914     msg += ' Make sure you are using the latest version; %s.' % update_cmd
 915     msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
 916
 917     before = before.rstrip()
 918     if not before or before.endswith(('.', '!', '?')):
 919         msg = msg[0].title() + msg[1:]
 920
 921     return (before + ' ' if before else '') + msg
 922
 923
 924 class YoutubeDLError(Exception):
 925     """Base exception for YoutubeDL errors."""
 926     msg = None
 927
 928     def __init__(self, msg=None):
 929         if msg is not None:
 930             self.msg = msg
 931         elif self.msg is None:
 932             self.msg = type(self).__name__
 933         super().__init__(self.msg)
 934
 935
 936 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
 937 if hasattr(ssl, 'CertificateError'):
 938     network_exceptions.append(ssl.CertificateError)
 939 network_exceptions = tuple(network_exceptions)
 940
 941
 942 class ExtractorError(YoutubeDLError):
 943     """Error during info extraction."""
 944
 945     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
 946         """ tb, if given, is the original traceback (so that it can be printed out).
 947         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
 948         """
 949         if sys.exc_info()[0] in network_exceptions:
 950             expected = True
 951
 952         self.msg = str(msg)
 953         self.traceback = tb
 954         self.expected = expected
 955         self.cause = cause
 956         self.video_id = video_id
 957         self.ie = ie
 958         self.exc_info = sys.exc_info()  # preserve original exception
 959
 960         super(ExtractorError, self).__init__(''.join((
 961             format_field(ie, template='[%s] '),
 962             format_field(video_id, template='%s: '),
 963             self.msg,
 964             format_field(cause, template=' (caused by %r)'),
 965             '' if expected else bug_reports_message())))
 966
 967     def format_traceback(self):
 968         if self.traceback is None:
 969             return None
 970         return ''.join(traceback.format_tb(self.traceback))
 971
 972
 973 class UnsupportedError(ExtractorError):
 974     def __init__(self, url):
 975         super(UnsupportedError, self).__init__(
 976             'Unsupported URL: %s' % url, expected=True)
 977         self.url = url
 978
 979
 980 class RegexNotFoundError(ExtractorError):
 981     """Error when a regex didn't match"""
 982     pass
 983
 984
 985 class GeoRestrictedError(ExtractorError):
 986     """Geographic restriction Error exception.
 987
 988     This exception may be thrown when a video is not available from your
 989     geographic location due to geographic restrictions imposed by a website.
 990     """
 991
 992     def __init__(self, msg, countries=None, **kwargs):
 993         kwargs['expected'] = True
 994         super(GeoRestrictedError, self).__init__(msg, **kwargs)
 995         self.countries = countries
 996
 997
 998 class DownloadError(YoutubeDLError):
 999     """Download Error exception.
1000
1001     This exception may be thrown by FileDownloader objects if they are not
1002     configured to continue on errors. They will contain the appropriate
1003     error message.
1004     """
1005
1006     def __init__(self, msg, exc_info=None):
1007         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
1008         super(DownloadError, self).__init__(msg)
1009         self.exc_info = exc_info
1010
1011
1012 class EntryNotInPlaylist(YoutubeDLError):
1013     """Entry not in playlist exception.
1014
1015     This exception will be thrown by YoutubeDL when a requested entry
1016     is not found in the playlist info_dict
1017     """
1018     msg = 'Entry not found in info'
1019
1020
1021 class SameFileError(YoutubeDLError):
1022     """Same File exception.
1023
1024     This exception will be thrown by FileDownloader objects if they detect
1025     multiple files would have to be downloaded to the same file on disk.
1026     """
1027     msg = 'Fixed output name but more than one file to download'
1028
1029     def __init__(self, filename=None):
1030         if filename is not None:
1031             self.msg += f': {filename}'
1032         super().__init__(self.msg)
1033
1034
1035 class PostProcessingError(YoutubeDLError):
1036     """Post Processing exception.
1037
1038     This exception may be raised by PostProcessor's .run() method to
1039     indicate an error in the postprocessing task.
1040     """
1041
1042
1043 class DownloadCancelled(YoutubeDLError):
1044     """ Exception raised when the download queue should be interrupted """
1045     msg = 'The download was cancelled'
1046
1047
1048 class ExistingVideoReached(DownloadCancelled):
1049     """ --break-on-existing triggered """
1050     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
1051
1052
1053 class RejectedVideoReached(DownloadCancelled):
1054     """ --break-on-reject triggered """
1055     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
1056
1057
1058 class MaxDownloadsReached(DownloadCancelled):
1059     """ --max-downloads limit has been reached. """
1060     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
1061
1062
1063 class ReExtractInfo(YoutubeDLError):
1064     """ Video info needs to be re-extracted. """
1065
1066     def __init__(self, msg, expected=False):
1067         super().__init__(msg)
1068         self.expected = expected
1069
1070
1071 class ThrottledDownload(ReExtractInfo):
1072     """ Download speed below --throttled-rate. """
1073     msg = 'The download speed is below throttle limit'
1074
1075     def __init__(self):
1076         super().__init__(self.msg, expected=False)
1077
1078
1079 class UnavailableVideoError(YoutubeDLError):
1080     """Unavailable Format exception.
1081
1082     This exception will be thrown when a video is requested
1083     in a format that is not available for that video.
1084     """
1085     msg = 'Unable to download video'
1086
1087     def __init__(self, err=None):
1088         if err is not None:
1089             self.msg += f': {err}'
1090         super().__init__(self.msg)
1091
1092
1093 class ContentTooShortError(YoutubeDLError):
1094     """Content Too Short exception.
1095
1096     This exception may be raised by FileDownloader objects when a file they
1097     download is too small for what the server announced first, indicating
1098     the connection was probably interrupted.
1099     """
1100
1101     def __init__(self, downloaded, expected):
1102         super(ContentTooShortError, self).__init__(
1103             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
1104         )
1105         # Both in bytes
1106         self.downloaded = downloaded
1107         self.expected = expected
1108
1109
1110 class XAttrMetadataError(YoutubeDLError):
1111     def __init__(self, code=None, msg='Unknown error'):
1112         super(XAttrMetadataError, self).__init__(msg)
1113         self.code = code
1114         self.msg = msg
1115
1116         # Parsing code and msg
1117         if (self.code in (errno.ENOSPC, errno.EDQUOT)
1118                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
1119             self.reason = 'NO_SPACE'
1120         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
1121             self.reason = 'VALUE_TOO_LONG'
1122         else:
1123             self.reason = 'NOT_SUPPORTED'
1124
1125
1126 class XAttrUnavailableError(YoutubeDLError):
1127     pass
1128
1129
1130 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
1131     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
1132     # expected HTTP responses to meet HTTP/1.0 or later (see also
1133     # https://github.com/ytdl-org/youtube-dl/issues/6727)
1134     if sys.version_info < (3, 0):
1135         kwargs['strict'] = True
1136     hc = http_class(*args, **compat_kwargs(kwargs))
1137     source_address = ydl_handler._params.get('source_address')
1138
1139     if source_address is not None:
1140         # This is to workaround _create_connection() from socket where it will try all
1141         # address data from getaddrinfo() including IPv6. This filters the result from
1142         # getaddrinfo() based on the source_address value.
1143         # This is based on the cpython socket.create_connection() function.
1144         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
1145         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
1146             host, port = address
1147             err = None
1148             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
1149             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
1150             ip_addrs = [addr for addr in addrs if addr[0] == af]
1151             if addrs and not ip_addrs:
1152                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
1153                 raise socket.error(
1154                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
1155                     % (ip_version, source_address[0]))
1156             for res in ip_addrs:
1157                 af, socktype, proto, canonname, sa = res
1158                 sock = None
1159                 try:
1160                     sock = socket.socket(af, socktype, proto)
1161                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
1162                         sock.settimeout(timeout)
1163                     sock.bind(source_address)
1164                     sock.connect(sa)
1165                     err = None  # Explicitly break reference cycle
1166                     return sock
1167                 except socket.error as _:
1168                     err = _
1169                     if sock is not None:
1170                         sock.close()
1171             if err is not None:
1172                 raise err
1173             else:
1174                 raise socket.error('getaddrinfo returns an empty list')
1175         if hasattr(hc, '_create_connection'):
1176             hc._create_connection = _create_connection
1177         sa = (source_address, 0)
1178         if hasattr(hc, 'source_address'):  # Python 2.7+
1179             hc.source_address = sa
1180         else:  # Python 2.6
1181             def _hc_connect(self, *args, **kwargs):
1182                 sock = _create_connection(
1183                     (self.host, self.port), self.timeout, sa)
1184                 if is_https:
1185                     self.sock = ssl.wrap_socket(
1186                         sock, self.key_file, self.cert_file,
1187                         ssl_version=ssl.PROTOCOL_TLSv1)
1188                 else:
1189                     self.sock = sock
1190             hc.connect = functools.partial(_hc_connect, hc)
1191
1192     return hc
1193
1194
1195 def handle_youtubedl_headers(headers):
1196     filtered_headers = headers
1197
1198     if 'Youtubedl-no-compression' in filtered_headers:
1199         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
1200         del filtered_headers['Youtubedl-no-compression']
1201
1202     return filtered_headers
1203
1204
1205 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
1206     """Handler for HTTP requests and responses.
1207
1208     This class, when installed with an OpenerDirector, automatically adds
1209     the standard headers to every HTTP request and handles gzipped and
1210     deflated responses from web servers. If compression is to be avoided in
1211     a particular request, the original request in the program code only has
1212     to include the HTTP header "Youtubedl-no-compression", which will be
1213     removed before making the real request.
1214
1215     Part of this code was copied from:
1216
1217     http://techknack.net/python-urllib2-handlers/
1218
1219     Andrew Rowls, the author of that code, agreed to release it to the
1220     public domain.
1221     """
1222
1223     def __init__(self, params, *args, **kwargs):
1224         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
1225         self._params = params
1226
1227     def http_open(self, req):
1228         conn_class = compat_http_client.HTTPConnection
1229
1230         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1231         if socks_proxy:
1232             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1233             del req.headers['Ytdl-socks-proxy']
1234
1235         return self.do_open(functools.partial(
1236             _create_http_connection, self, conn_class, False),
1237             req)
1238
1239     @staticmethod
1240     def deflate(data):
1241         if not data:
1242             return data
1243         try:
1244             return zlib.decompress(data, -zlib.MAX_WBITS)
1245         except zlib.error:
1246             return zlib.decompress(data)
1247
1248     def http_request(self, req):
1249         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1250         # always respected by websites, some tend to give out URLs with non percent-encoded
1251         # non-ASCII characters (see telemb.py, ard.py [#3412])
1252         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1253         # To work around aforementioned issue we will replace request's original URL with
1254         # percent-encoded one
1255         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
1256         # the code of this workaround has been moved here from YoutubeDL.urlopen()
1257         url = req.get_full_url()
1258         url_escaped = escape_url(url)
1259
1260         # Substitute URL if any change after escaping
1261         if url != url_escaped:
1262             req = update_Request(req, url=url_escaped)
1263
1264         for h, v in std_headers.items():
1265             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
1266             # The dict keys are capitalized because of this bug by urllib
1267             if h.capitalize() not in req.headers:
1268                 req.add_header(h, v)
1269
1270         req.headers = handle_youtubedl_headers(req.headers)
1271
1272         if sys.version_info < (2, 7) and '#' in req.get_full_url():
1273             # Python 2.6 is brain-dead when it comes to fragments
1274             req._Request__original = req._Request__original.partition('#')[0]
1275             req._Request__r_type = req._Request__r_type.partition('#')[0]
1276
1277         return req
1278
1279     def http_response(self, req, resp):
1280         old_resp = resp
1281         # gzip
1282         if resp.headers.get('Content-encoding', '') == 'gzip':
1283             content = resp.read()
1284             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
1285             try:
1286                 uncompressed = io.BytesIO(gz.read())
1287             except IOError as original_ioerror:
1288                 # There may be junk add the end of the file
1289                 # See http://stackoverflow.com/q/4928560/35070 for details
1290                 for i in range(1, 1024):
1291                     try:
1292                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
1293                         uncompressed = io.BytesIO(gz.read())
1294                     except IOError:
1295                         continue
1296                     break
1297                 else:
1298                     raise original_ioerror
1299             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
1300             resp.msg = old_resp.msg
1301             del resp.headers['Content-encoding']
1302         # deflate
1303         if resp.headers.get('Content-encoding', '') == 'deflate':
1304             gz = io.BytesIO(self.deflate(resp.read()))
1305             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
1306             resp.msg = old_resp.msg
1307             del resp.headers['Content-encoding']
1308         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1309         # https://github.com/ytdl-org/youtube-dl/issues/6457).
1310         if 300 <= resp.code < 400:
1311             location = resp.headers.get('Location')
1312             if location:
1313                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1314                 if sys.version_info >= (3, 0):
1315                     location = location.encode('iso-8859-1').decode('utf-8')
1316                 else:
1317                     location = location.decode('utf-8')
1318                 location_escaped = escape_url(location)
1319                 if location != location_escaped:
1320                     del resp.headers['Location']
1321                     if sys.version_info < (3, 0):
1322                         location_escaped = location_escaped.encode('utf-8')
1323                     resp.headers['Location'] = location_escaped
1324         return resp
1325
1326     https_request = http_request
1327     https_response = http_response
1328
1329
1330 def make_socks_conn_class(base_class, socks_proxy):
1331     assert issubclass(base_class, (
1332         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
1333
1334     url_components = compat_urlparse.urlparse(socks_proxy)
1335     if url_components.scheme.lower() == 'socks5':
1336         socks_type = ProxyType.SOCKS5
1337     elif url_components.scheme.lower() in ('socks', 'socks4'):
1338         socks_type = ProxyType.SOCKS4
1339     elif url_components.scheme.lower() == 'socks4a':
1340         socks_type = ProxyType.SOCKS4A
1341
1342     def unquote_if_non_empty(s):
1343         if not s:
1344             return s
1345         return compat_urllib_parse_unquote_plus(s)
1346
1347     proxy_args = (
1348         socks_type,
1349         url_components.hostname, url_components.port or 1080,
1350         True,  # Remote DNS
1351         unquote_if_non_empty(url_components.username),
1352         unquote_if_non_empty(url_components.password),
1353     )
1354
1355     class SocksConnection(base_class):
1356         def connect(self):
1357             self.sock = sockssocket()
1358             self.sock.setproxy(*proxy_args)
1359             if type(self.timeout) in (int, float):
1360                 self.sock.settimeout(self.timeout)
1361             self.sock.connect((self.host, self.port))
1362
1363             if isinstance(self, compat_http_client.HTTPSConnection):
1364                 if hasattr(self, '_context'):  # Python > 2.6
1365                     self.sock = self._context.wrap_socket(
1366                         self.sock, server_hostname=self.host)
1367                 else:
1368                     self.sock = ssl.wrap_socket(self.sock)
1369
1370     return SocksConnection
1371
1372
1373 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
1374     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1375         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
1376         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
1377         self._params = params
1378
1379     def https_open(self, req):
1380         kwargs = {}
1381         conn_class = self._https_conn_class
1382
1383         if hasattr(self, '_context'):  # python > 2.6
1384             kwargs['context'] = self._context
1385         if hasattr(self, '_check_hostname'):  # python 3.x
1386             kwargs['check_hostname'] = self._check_hostname
1387
1388         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1389         if socks_proxy:
1390             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1391             del req.headers['Ytdl-socks-proxy']
1392
1393         return self.do_open(functools.partial(
1394             _create_http_connection, self, conn_class, True),
1395             req, **kwargs)
1396
1397
1398 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
1399     """
1400     See [1] for cookie file format.
1401
1402     1. https://curl.haxx.se/docs/http-cookies.html
1403     """
1404     _HTTPONLY_PREFIX = '#HttpOnly_'
1405     _ENTRY_LEN = 7
1406     _HEADER = '''# Netscape HTTP Cookie File
1407 # This file is generated by yt-dlp.  Do not edit.
1408
1409 '''
1410     _CookieFileEntry = collections.namedtuple(
1411         'CookieFileEntry',
1412         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1413
1414     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1415         """
1416         Save cookies to a file.
1417
1418         Most of the code is taken from CPython 3.8 and slightly adapted
1419         to support cookie files with UTF-8 in both python 2 and 3.
1420         """
1421         if filename is None:
1422             if self.filename is not None:
1423                 filename = self.filename
1424             else:
1425                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1426
1427         # Store session cookies with `expires` set to 0 instead of an empty
1428         # string
1429         for cookie in self:
1430             if cookie.expires is None:
1431                 cookie.expires = 0
1432
1433         with io.open(filename, 'w', encoding='utf-8') as f:
1434             f.write(self._HEADER)
1435             now = time.time()
1436             for cookie in self:
1437                 if not ignore_discard and cookie.discard:
1438                     continue
1439                 if not ignore_expires and cookie.is_expired(now):
1440                     continue
1441                 if cookie.secure:
1442                     secure = 'TRUE'
1443                 else:
1444                     secure = 'FALSE'
1445                 if cookie.domain.startswith('.'):
1446                     initial_dot = 'TRUE'
1447                 else:
1448                     initial_dot = 'FALSE'
1449                 if cookie.expires is not None:
1450                     expires = compat_str(cookie.expires)
1451                 else:
1452                     expires = ''
1453                 if cookie.value is None:
1454                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
1455                     # with no name, whereas http.cookiejar regards it as a
1456                     # cookie with no value.
1457                     name = ''
1458                     value = cookie.name
1459                 else:
1460                     name = cookie.name
1461                     value = cookie.value
1462                 f.write(
1463                     '\t'.join([cookie.domain, initial_dot, cookie.path,
1464                                secure, expires, name, value]) + '\n')
1465
1466     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1467         """Load cookies from a file."""
1468         if filename is None:
1469             if self.filename is not None:
1470                 filename = self.filename
1471             else:
1472                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
1473
1474         def prepare_line(line):
1475             if line.startswith(self._HTTPONLY_PREFIX):
1476                 line = line[len(self._HTTPONLY_PREFIX):]
1477             # comments and empty lines are fine
1478             if line.startswith('#') or not line.strip():
1479                 return line
1480             cookie_list = line.split('\t')
1481             if len(cookie_list) != self._ENTRY_LEN:
1482                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
1483             cookie = self._CookieFileEntry(*cookie_list)
1484             if cookie.expires_at and not cookie.expires_at.isdigit():
1485                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
1486             return line
1487
1488         cf = io.StringIO()
1489         with io.open(filename, encoding='utf-8') as f:
1490             for line in f:
1491                 try:
1492                     cf.write(prepare_line(line))
1493                 except compat_cookiejar.LoadError as e:
1494                     write_string(
1495                         'WARNING: skipping cookie file entry due to %s: %r\n'
1496                         % (e, line), sys.stderr)
1497                     continue
1498         cf.seek(0)
1499         self._really_load(cf, filename, ignore_discard, ignore_expires)
1500         # Session cookies are denoted by either `expires` field set to
1501         # an empty string or 0. MozillaCookieJar only recognizes the former
1502         # (see [1]). So we need force the latter to be recognized as session
1503         # cookies on our own.
1504         # Session cookies may be important for cookies-based authentication,
1505         # e.g. usually, when user does not check 'Remember me' check box while
1506         # logging in on a site, some important cookies are stored as session
1507         # cookies so that not recognizing them will result in failed login.
1508         # 1. https://bugs.python.org/issue17164
1509         for cookie in self:
1510             # Treat `expires=0` cookies as session cookies
1511             if cookie.expires == 0:
1512                 cookie.expires = None
1513                 cookie.discard = True
1514
1515
1516 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
1517     def __init__(self, cookiejar=None):
1518         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
1519
1520     def http_response(self, request, response):
1521         # Python 2 will choke on next HTTP request in row if there are non-ASCII
1522         # characters in Set-Cookie HTTP header of last response (see
1523         # https://github.com/ytdl-org/youtube-dl/issues/6769).
1524         # In order to at least prevent crashing we will percent encode Set-Cookie
1525         # header before HTTPCookieProcessor starts processing it.
1526         # if sys.version_info < (3, 0) and response.headers:
1527         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
1528         #         set_cookie = response.headers.get(set_cookie_header)
1529         #         if set_cookie:
1530         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
1531         #             if set_cookie != set_cookie_escaped:
1532         #                 del response.headers[set_cookie_header]
1533         #                 response.headers[set_cookie_header] = set_cookie_escaped
1534         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
1535
1536     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
1537     https_response = http_response
1538
1539
1540 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
1541     """YoutubeDL redirect handler
1542
1543     The code is based on HTTPRedirectHandler implementation from CPython [1].
1544
1545     This redirect handler solves two issues:
1546      - ensures redirect URL is always unicode under python 2
1547      - introduces support for experimental HTTP response status code
1548        308 Permanent Redirect [2] used by some sites [3]
1549
1550     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
1551     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
1552     3. https://github.com/ytdl-org/youtube-dl/issues/28768
1553     """
1554
1555     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
1556
1557     def redirect_request(self, req, fp, code, msg, headers, newurl):
1558         """Return a Request or None in response to a redirect.
1559
1560         This is called by the http_error_30x methods when a
1561         redirection response is received.  If a redirection should
1562         take place, return a new Request to allow http_error_30x to
1563         perform the redirect.  Otherwise, raise HTTPError if no-one
1564         else should try to handle this url.  Return None if you can't
1565         but another Handler might.
1566         """
1567         m = req.get_method()
1568         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
1569                  or code in (301, 302, 303) and m == "POST")):
1570             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
1571         # Strictly (according to RFC 2616), 301 or 302 in response to
1572         # a POST MUST NOT cause a redirection without confirmation
1573         # from the user (of urllib.request, in this case).  In practice,
1574         # essentially all clients do redirect in this case, so we do
1575         # the same.
1576
1577         # On python 2 urlh.geturl() may sometimes return redirect URL
1578         # as byte string instead of unicode. This workaround allows
1579         # to force it always return unicode.
1580         if sys.version_info[0] < 3:
1581             newurl = compat_str(newurl)
1582
1583         # Be conciliant with URIs containing a space.  This is mainly
1584         # redundant with the more complete encoding done in http_error_302(),
1585         # but it is kept for compatibility with other callers.
1586         newurl = newurl.replace(' ', '%20')
1587
1588         CONTENT_HEADERS = ("content-length", "content-type")
1589         # NB: don't use dict comprehension for python 2.6 compatibility
1590         newheaders = dict((k, v) for k, v in req.headers.items()
1591                           if k.lower() not in CONTENT_HEADERS)
1592         return compat_urllib_request.Request(
1593             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
1594             unverifiable=True)
1595
1596
1597 def extract_timezone(date_str):
1598     m = re.search(
1599         r'''(?x)
1600             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
1601             (?P<tz>Z|                                            # just the UTC Z, or
1602                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
1603                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
1604                    [ ]?                                          # optional space
1605                 (?P<sign>\+|-)                                   # +/-
1606                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
1607             $)
1608         ''', date_str)
1609     if not m:
1610         timezone = datetime.timedelta()
1611     else:
1612         date_str = date_str[:-len(m.group('tz'))]
1613         if not m.group('sign'):
1614             timezone = datetime.timedelta()
1615         else:
1616             sign = 1 if m.group('sign') == '+' else -1
1617             timezone = datetime.timedelta(
1618                 hours=sign * int(m.group('hours')),
1619                 minutes=sign * int(m.group('minutes')))
1620     return timezone, date_str
1621
1622
1623 def parse_iso8601(date_str, delimiter='T', timezone=None):
1624     """ Return a UNIX timestamp from the given date """
1625
1626     if date_str is None:
1627         return None
1628
1629     date_str = re.sub(r'\.[0-9]+', '', date_str)
1630
1631     if timezone is None:
1632         timezone, date_str = extract_timezone(date_str)
1633
1634     try:
1635         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
1636         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1637         return calendar.timegm(dt.timetuple())
1638     except ValueError:
1639         pass
1640
1641
1642 def date_formats(day_first=True):
1643     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1644
1645
1646 def unified_strdate(date_str, day_first=True):
1647     """Return a string with the date in the format YYYYMMDD"""
1648
1649     if date_str is None:
1650         return None
1651     upload_date = None
1652     # Replace commas
1653     date_str = date_str.replace(',', ' ')
1654     # Remove AM/PM + timezone
1655     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1656     _, date_str = extract_timezone(date_str)
1657
1658     for expression in date_formats(day_first):
1659         try:
1660             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1661         except ValueError:
1662             pass
1663     if upload_date is None:
1664         timetuple = email.utils.parsedate_tz(date_str)
1665         if timetuple:
1666             try:
1667                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1668             except ValueError:
1669                 pass
1670     if upload_date is not None:
1671         return compat_str(upload_date)
1672
1673
1674 def unified_timestamp(date_str, day_first=True):
1675     if date_str is None:
1676         return None
1677
1678     date_str = re.sub(r'[,|]', '', date_str)
1679
1680     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1681     timezone, date_str = extract_timezone(date_str)
1682
1683     # Remove AM/PM + timezone
1684     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1685
1686     # Remove unrecognized timezones from ISO 8601 alike timestamps
1687     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1688     if m:
1689         date_str = date_str[:-len(m.group('tz'))]
1690
1691     # Python only supports microseconds, so remove nanoseconds
1692     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
1693     if m:
1694         date_str = m.group(1)
1695
1696     for expression in date_formats(day_first):
1697         try:
1698             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1699             return calendar.timegm(dt.timetuple())
1700         except ValueError:
1701             pass
1702     timetuple = email.utils.parsedate_tz(date_str)
1703     if timetuple:
1704         return calendar.timegm(timetuple) + pm_delta * 3600
1705
1706
1707 def determine_ext(url, default_ext='unknown_video'):
1708     if url is None or '.' not in url:
1709         return default_ext
1710     guess = url.partition('?')[0].rpartition('.')[2]
1711     if re.match(r'^[A-Za-z0-9]+$', guess):
1712         return guess
1713     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1714     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1715         return guess.rstrip('/')
1716     else:
1717         return default_ext
1718
1719
1720 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
1721     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
1722
1723
1724 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
1725     """
1726     Return a datetime object from a string in the format YYYYMMDD or
1727     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
1728
1729     format: string date format used to return datetime object from
1730     precision: round the time portion of a datetime object.
1731                 auto|microsecond|second|minute|hour|day.
1732                 auto: round to the unit provided in date_str (if applicable).
1733     """
1734     auto_precision = False
1735     if precision == 'auto':
1736         auto_precision = True
1737         precision = 'microsecond'
1738     today = datetime_round(datetime.datetime.now(), precision)
1739     if date_str in ('now', 'today'):
1740         return today
1741     if date_str == 'yesterday':
1742         return today - datetime.timedelta(days=1)
1743     match = re.match(
1744         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
1745         date_str)
1746     if match is not None:
1747         start_time = datetime_from_str(match.group('start'), precision, format)
1748         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
1749         unit = match.group('unit')
1750         if unit == 'month' or unit == 'year':
1751             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
1752             unit = 'day'
1753         else:
1754             if unit == 'week':
1755                 unit = 'day'
1756                 time *= 7
1757             delta = datetime.timedelta(**{unit + 's': time})
1758             new_date = start_time + delta
1759         if auto_precision:
1760             return datetime_round(new_date, unit)
1761         return new_date
1762
1763     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
1764
1765
1766 def date_from_str(date_str, format='%Y%m%d'):
1767     """
1768     Return a datetime object from a string in the format YYYYMMDD or
1769     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
1770
1771     format: string date format used to return datetime object from
1772     """
1773     return datetime_from_str(date_str, precision='microsecond', format=format).date()
1774
1775
1776 def datetime_add_months(dt, months):
1777     """Increment/Decrement a datetime object by months."""
1778     month = dt.month + months - 1
1779     year = dt.year + month // 12
1780     month = month % 12 + 1
1781     day = min(dt.day, calendar.monthrange(year, month)[1])
1782     return dt.replace(year, month, day)
1783
1784
1785 def datetime_round(dt, precision='day'):
1786     """
1787     Round a datetime object's time to a specific precision
1788     """
1789     if precision == 'microsecond':
1790         return dt
1791
1792     unit_seconds = {
1793         'day': 86400,
1794         'hour': 3600,
1795         'minute': 60,
1796         'second': 1,
1797     }
1798     roundto = lambda x, n: ((x + n / 2) // n) * n
1799     timestamp = calendar.timegm(dt.timetuple())
1800     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
1801
1802
1803 def hyphenate_date(date_str):
1804     """
1805     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1806     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1807     if match is not None:
1808         return '-'.join(match.groups())
1809     else:
1810         return date_str
1811
1812
1813 class DateRange(object):
1814     """Represents a time interval between two dates"""
1815
1816     def __init__(self, start=None, end=None):
1817         """start and end must be strings in the format accepted by date"""
1818         if start is not None:
1819             self.start = date_from_str(start)
1820         else:
1821             self.start = datetime.datetime.min.date()
1822         if end is not None:
1823             self.end = date_from_str(end)
1824         else:
1825             self.end = datetime.datetime.max.date()
1826         if self.start > self.end:
1827             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1828
1829     @classmethod
1830     def day(cls, day):
1831         """Returns a range that only contains the given day"""
1832         return cls(day, day)
1833
1834     def __contains__(self, date):
1835         """Check if the date is in the range"""
1836         if not isinstance(date, datetime.date):
1837             date = date_from_str(date)
1838         return self.start <= date <= self.end
1839
1840     def __str__(self):
1841         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
1842
1843
1844 def platform_name():
1845     """ Returns the platform name as a compat_str """
1846     res = platform.platform()
1847     if isinstance(res, bytes):
1848         res = res.decode(preferredencoding())
1849
1850     assert isinstance(res, compat_str)
1851     return res
1852
1853
1854 def get_windows_version():
1855     ''' Get Windows version. None if it's not running on Windows '''
1856     if compat_os_name == 'nt':
1857         return version_tuple(platform.win32_ver()[1])
1858     else:
1859         return None
1860
1861
1862 def _windows_write_string(s, out):
1863     """ Returns True if the string was written using special methods,
1864     False if it has yet to be written out."""
1865     # Adapted from http://stackoverflow.com/a/3259271/35070
1866
1867     import ctypes.wintypes
1868
1869     WIN_OUTPUT_IDS = {
1870         1: -11,
1871         2: -12,
1872     }
1873
1874     try:
1875         fileno = out.fileno()
1876     except AttributeError:
1877         # If the output stream doesn't have a fileno, it's virtual
1878         return False
1879     except io.UnsupportedOperation:
1880         # Some strange Windows pseudo files?
1881         return False
1882     if fileno not in WIN_OUTPUT_IDS:
1883         return False
1884
1885     GetStdHandle = compat_ctypes_WINFUNCTYPE(
1886         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
1887         ('GetStdHandle', ctypes.windll.kernel32))
1888     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
1889
1890     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
1891         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
1892         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
1893         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
1894     written = ctypes.wintypes.DWORD(0)
1895
1896     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
1897     FILE_TYPE_CHAR = 0x0002
1898     FILE_TYPE_REMOTE = 0x8000
1899     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
1900         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
1901         ctypes.POINTER(ctypes.wintypes.DWORD))(
1902         ('GetConsoleMode', ctypes.windll.kernel32))
1903     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
1904
1905     def not_a_console(handle):
1906         if handle == INVALID_HANDLE_VALUE or handle is None:
1907             return True
1908         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
1909                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
1910
1911     if not_a_console(h):
1912         return False
1913
1914     def next_nonbmp_pos(s):
1915         try:
1916             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
1917         except StopIteration:
1918             return len(s)
1919
1920     while s:
1921         count = min(next_nonbmp_pos(s), 1024)
1922
1923         ret = WriteConsoleW(
1924             h, s, count if count else 2, ctypes.byref(written), None)
1925         if ret == 0:
1926             raise OSError('Failed to write string')
1927         if not count:  # We just wrote a non-BMP character
1928             assert written.value == 2
1929             s = s[1:]
1930         else:
1931             assert written.value > 0
1932             s = s[written.value:]
1933     return True
1934
1935
1936 def write_string(s, out=None, encoding=None):
1937     if out is None:
1938         out = sys.stderr
1939     assert type(s) == compat_str
1940
1941     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
1942         if _windows_write_string(s, out):
1943             return
1944
1945     if ('b' in getattr(out, 'mode', '')
1946             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
1947         byt = s.encode(encoding or preferredencoding(), 'ignore')
1948         out.write(byt)
1949     elif hasattr(out, 'buffer'):
1950         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1951         byt = s.encode(enc, 'ignore')
1952         out.buffer.write(byt)
1953     else:
1954         out.write(s)
1955     out.flush()
1956
1957
1958 def bytes_to_intlist(bs):
1959     if not bs:
1960         return []
1961     if isinstance(bs[0], int):  # Python 3
1962         return list(bs)
1963     else:
1964         return [ord(c) for c in bs]
1965
1966
1967 def intlist_to_bytes(xs):
1968     if not xs:
1969         return b''
1970     return compat_struct_pack('%dB' % len(xs), *xs)
1971
1972
1973 # Cross-platform file locking
1974 if sys.platform == 'win32':
1975     import ctypes.wintypes
1976     import msvcrt
1977
1978     class OVERLAPPED(ctypes.Structure):
1979         _fields_ = [
1980             ('Internal', ctypes.wintypes.LPVOID),
1981             ('InternalHigh', ctypes.wintypes.LPVOID),
1982             ('Offset', ctypes.wintypes.DWORD),
1983             ('OffsetHigh', ctypes.wintypes.DWORD),
1984             ('hEvent', ctypes.wintypes.HANDLE),
1985         ]
1986
1987     kernel32 = ctypes.windll.kernel32
1988     LockFileEx = kernel32.LockFileEx
1989     LockFileEx.argtypes = [
1990         ctypes.wintypes.HANDLE,     # hFile
1991         ctypes.wintypes.DWORD,      # dwFlags
1992         ctypes.wintypes.DWORD,      # dwReserved
1993         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
1994         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
1995         ctypes.POINTER(OVERLAPPED)  # Overlapped
1996     ]
1997     LockFileEx.restype = ctypes.wintypes.BOOL
1998     UnlockFileEx = kernel32.UnlockFileEx
1999     UnlockFileEx.argtypes = [
2000         ctypes.wintypes.HANDLE,     # hFile
2001         ctypes.wintypes.DWORD,      # dwReserved
2002         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
2003         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
2004         ctypes.POINTER(OVERLAPPED)  # Overlapped
2005     ]
2006     UnlockFileEx.restype = ctypes.wintypes.BOOL
2007     whole_low = 0xffffffff
2008     whole_high = 0x7fffffff
2009
2010     def _lock_file(f, exclusive):
2011         overlapped = OVERLAPPED()
2012         overlapped.Offset = 0
2013         overlapped.OffsetHigh = 0
2014         overlapped.hEvent = 0
2015         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
2016         handle = msvcrt.get_osfhandle(f.fileno())
2017         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
2018                           whole_low, whole_high, f._lock_file_overlapped_p):
2019             raise OSError('Locking file failed: %r' % ctypes.FormatError())
2020
2021     def _unlock_file(f):
2022         assert f._lock_file_overlapped_p
2023         handle = msvcrt.get_osfhandle(f.fileno())
2024         if not UnlockFileEx(handle, 0,
2025                             whole_low, whole_high, f._lock_file_overlapped_p):
2026             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
2027
2028 else:
2029     # Some platforms, such as Jython, is missing fcntl
2030     try:
2031         import fcntl
2032
2033         def _lock_file(f, exclusive):
2034             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
2035
2036         def _unlock_file(f):
2037             fcntl.flock(f, fcntl.LOCK_UN)
2038     except ImportError:
2039         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
2040
2041         def _lock_file(f, exclusive):
2042             raise IOError(UNSUPPORTED_MSG)
2043
2044         def _unlock_file(f):
2045             raise IOError(UNSUPPORTED_MSG)
2046
2047
2048 class locked_file(object):
2049     def __init__(self, filename, mode, encoding=None):
2050         assert mode in ['r', 'a', 'w']
2051         self.f = io.open(filename, mode, encoding=encoding)
2052         self.mode = mode
2053
2054     def __enter__(self):
2055         exclusive = self.mode != 'r'
2056         try:
2057             _lock_file(self.f, exclusive)
2058         except IOError:
2059             self.f.close()
2060             raise
2061         return self
2062
2063     def __exit__(self, etype, value, traceback):
2064         try:
2065             _unlock_file(self.f)
2066         finally:
2067             self.f.close()
2068
2069     def __iter__(self):
2070         return iter(self.f)
2071
2072     def write(self, *args):
2073         return self.f.write(*args)
2074
2075     def read(self, *args):
2076         return self.f.read(*args)
2077
2078
2079 def get_filesystem_encoding():
2080     encoding = sys.getfilesystemencoding()
2081     return encoding if encoding is not None else 'utf-8'
2082
2083
2084 def shell_quote(args):
2085     quoted_args = []
2086     encoding = get_filesystem_encoding()
2087     for a in args:
2088         if isinstance(a, bytes):
2089             # We may get a filename encoded with 'encodeFilename'
2090             a = a.decode(encoding)
2091         quoted_args.append(compat_shlex_quote(a))
2092     return ' '.join(quoted_args)
2093
2094
2095 def smuggle_url(url, data):
2096     """ Pass additional data in a URL for internal use. """
2097
2098     url, idata = unsmuggle_url(url, {})
2099     data.update(idata)
2100     sdata = compat_urllib_parse_urlencode(
2101         {'__youtubedl_smuggle': json.dumps(data)})
2102     return url + '#' + sdata
2103
2104
2105 def unsmuggle_url(smug_url, default=None):
2106     if '#__youtubedl_smuggle' not in smug_url:
2107         return smug_url, default
2108     url, _, sdata = smug_url.rpartition('#')
2109     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
2110     data = json.loads(jsond)
2111     return url, data
2112
2113
2114 def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
2115     """ Formats numbers with decimal sufixes like K, M, etc """
2116     num, factor = float_or_none(num), float(factor)
2117     if num is None:
2118         return None
2119     exponent = 0 if num == 0 else int(math.log(num, factor))
2120     suffix = ['', *'kMGTPEZY'][exponent]
2121     if factor == 1024:
2122         suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
2123     converted = num / (factor ** exponent)
2124     return fmt % (converted, suffix)
2125
2126
2127 def format_bytes(bytes):
2128     return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
2129
2130
2131 def lookup_unit_table(unit_table, s):
2132     units_re = '|'.join(re.escape(u) for u in unit_table)
2133     m = re.match(
2134         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
2135     if not m:
2136         return None
2137     num_str = m.group('num').replace(',', '.')
2138     mult = unit_table[m.group('unit')]
2139     return int(float(num_str) * mult)
2140
2141
2142 def parse_filesize(s):
2143     if s is None:
2144         return None
2145
2146     # The lower-case forms are of course incorrect and unofficial,
2147     # but we support those too
2148     _UNIT_TABLE = {
2149         'B': 1,
2150         'b': 1,
2151         'bytes': 1,
2152         'KiB': 1024,
2153         'KB': 1000,
2154         'kB': 1024,
2155         'Kb': 1000,
2156         'kb': 1000,
2157         'kilobytes': 1000,
2158         'kibibytes': 1024,
2159         'MiB': 1024 ** 2,
2160         'MB': 1000 ** 2,
2161         'mB': 1024 ** 2,
2162         'Mb': 1000 ** 2,
2163         'mb': 1000 ** 2,
2164         'megabytes': 1000 ** 2,
2165         'mebibytes': 1024 ** 2,
2166         'GiB': 1024 ** 3,
2167         'GB': 1000 ** 3,
2168         'gB': 1024 ** 3,
2169         'Gb': 1000 ** 3,
2170         'gb': 1000 ** 3,
2171         'gigabytes': 1000 ** 3,
2172         'gibibytes': 1024 ** 3,
2173         'TiB': 1024 ** 4,
2174         'TB': 1000 ** 4,
2175         'tB': 1024 ** 4,
2176         'Tb': 1000 ** 4,
2177         'tb': 1000 ** 4,
2178         'terabytes': 1000 ** 4,
2179         'tebibytes': 1024 ** 4,
2180         'PiB': 1024 ** 5,
2181         'PB': 1000 ** 5,
2182         'pB': 1024 ** 5,
2183         'Pb': 1000 ** 5,
2184         'pb': 1000 ** 5,
2185         'petabytes': 1000 ** 5,
2186         'pebibytes': 1024 ** 5,
2187         'EiB': 1024 ** 6,
2188         'EB': 1000 ** 6,
2189         'eB': 1024 ** 6,
2190         'Eb': 1000 ** 6,
2191         'eb': 1000 ** 6,
2192         'exabytes': 1000 ** 6,
2193         'exbibytes': 1024 ** 6,
2194         'ZiB': 1024 ** 7,
2195         'ZB': 1000 ** 7,
2196         'zB': 1024 ** 7,
2197         'Zb': 1000 ** 7,
2198         'zb': 1000 ** 7,
2199         'zettabytes': 1000 ** 7,
2200         'zebibytes': 1024 ** 7,
2201         'YiB': 1024 ** 8,
2202         'YB': 1000 ** 8,
2203         'yB': 1024 ** 8,
2204         'Yb': 1000 ** 8,
2205         'yb': 1000 ** 8,
2206         'yottabytes': 1000 ** 8,
2207         'yobibytes': 1024 ** 8,
2208     }
2209
2210     return lookup_unit_table(_UNIT_TABLE, s)
2211
2212
2213 def parse_count(s):
2214     if s is None:
2215         return None
2216
2217     s = re.sub(r'^[^\d]+\s', '', s).strip()
2218
2219     if re.match(r'^[\d,.]+$', s):
2220         return str_to_int(s)
2221
2222     _UNIT_TABLE = {
2223         'k': 1000,
2224         'K': 1000,
2225         'm': 1000 ** 2,
2226         'M': 1000 ** 2,
2227         'kk': 1000 ** 2,
2228         'KK': 1000 ** 2,
2229         'b': 1000 ** 3,
2230         'B': 1000 ** 3,
2231     }
2232
2233     ret = lookup_unit_table(_UNIT_TABLE, s)
2234     if ret is not None:
2235         return ret
2236
2237     mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
2238     if mobj:
2239         return str_to_int(mobj.group(1))
2240
2241
2242 def parse_resolution(s):
2243     if s is None:
2244         return {}
2245
2246     mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
2247     if mobj:
2248         return {
2249             'width': int(mobj.group('w')),
2250             'height': int(mobj.group('h')),
2251         }
2252
2253     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
2254     if mobj:
2255         return {'height': int(mobj.group(1))}
2256
2257     mobj = re.search(r'\b([48])[kK]\b', s)
2258     if mobj:
2259         return {'height': int(mobj.group(1)) * 540}
2260
2261     return {}
2262
2263
2264 def parse_bitrate(s):
2265     if not isinstance(s, compat_str):
2266         return
2267     mobj = re.search(r'\b(\d+)\s*kbps', s)
2268     if mobj:
2269         return int(mobj.group(1))
2270
2271
2272 def month_by_name(name, lang='en'):
2273     """ Return the number of a month by (locale-independently) English name """
2274
2275     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
2276
2277     try:
2278         return month_names.index(name) + 1
2279     except ValueError:
2280         return None
2281
2282
2283 def month_by_abbreviation(abbrev):
2284     """ Return the number of a month by (locale-independently) English
2285         abbreviations """
2286
2287     try:
2288         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
2289     except ValueError:
2290         return None
2291
2292
2293 def fix_xml_ampersands(xml_str):
2294     """Replace all the '&' by '&amp;' in XML"""
2295     return re.sub(
2296         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
2297         '&amp;',
2298         xml_str)
2299
2300
2301 def setproctitle(title):
2302     assert isinstance(title, compat_str)
2303
2304     # ctypes in Jython is not complete
2305     # http://bugs.jython.org/issue2148
2306     if sys.platform.startswith('java'):
2307         return
2308
2309     try:
2310         libc = ctypes.cdll.LoadLibrary('libc.so.6')
2311     except OSError:
2312         return
2313     except TypeError:
2314         # LoadLibrary in Windows Python 2.7.13 only expects
2315         # a bytestring, but since unicode_literals turns
2316         # every string into a unicode string, it fails.
2317         return
2318     title_bytes = title.encode('utf-8')
2319     buf = ctypes.create_string_buffer(len(title_bytes))
2320     buf.value = title_bytes
2321     try:
2322         libc.prctl(15, buf, 0, 0, 0)
2323     except AttributeError:
2324         return  # Strange libc, just skip this
2325
2326
2327 def remove_start(s, start):
2328     return s[len(start):] if s is not None and s.startswith(start) else s
2329
2330
2331 def remove_end(s, end):
2332     return s[:-len(end)] if s is not None and s.endswith(end) else s
2333
2334
2335 def remove_quotes(s):
2336     if s is None or len(s) < 2:
2337         return s
2338     for quote in ('"', "'", ):
2339         if s[0] == quote and s[-1] == quote:
2340             return s[1:-1]
2341     return s
2342
2343
2344 def get_domain(url):
2345     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
2346     return domain.group('domain') if domain else None
2347
2348
2349 def url_basename(url):
2350     path = compat_urlparse.urlparse(url).path
2351     return path.strip('/').split('/')[-1]
2352
2353
2354 def base_url(url):
2355     return re.match(r'https?://[^?#&]+/', url).group()
2356
2357
2358 def urljoin(base, path):
2359     if isinstance(path, bytes):
2360         path = path.decode('utf-8')
2361     if not isinstance(path, compat_str) or not path:
2362         return None
2363     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
2364         return path
2365     if isinstance(base, bytes):
2366         base = base.decode('utf-8')
2367     if not isinstance(base, compat_str) or not re.match(
2368             r'^(?:https?:)?//', base):
2369         return None
2370     return compat_urlparse.urljoin(base, path)
2371
2372
2373 class HEADRequest(compat_urllib_request.Request):
2374     def get_method(self):
2375         return 'HEAD'
2376
2377
2378 class PUTRequest(compat_urllib_request.Request):
2379     def get_method(self):
2380         return 'PUT'
2381
2382
2383 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
2384     if get_attr:
2385         if v is not None:
2386             v = getattr(v, get_attr, None)
2387     if v == '':
2388         v = None
2389     if v is None:
2390         return default
2391     try:
2392         return int(v) * invscale // scale
2393     except (ValueError, TypeError, OverflowError):
2394         return default
2395
2396
2397 def str_or_none(v, default=None):
2398     return default if v is None else compat_str(v)
2399
2400
2401 def str_to_int(int_str):
2402     """ A more relaxed version of int_or_none """
2403     if isinstance(int_str, compat_integer_types):
2404         return int_str
2405     elif isinstance(int_str, compat_str):
2406         int_str = re.sub(r'[,\.\+]', '', int_str)
2407         return int_or_none(int_str)
2408
2409
2410 def float_or_none(v, scale=1, invscale=1, default=None):
2411     if v is None:
2412         return default
2413     try:
2414         return float(v) * invscale / scale
2415     except (ValueError, TypeError):
2416         return default
2417
2418
2419 def bool_or_none(v, default=None):
2420     return v if isinstance(v, bool) else default
2421
2422
2423 def strip_or_none(v, default=None):
2424     return v.strip() if isinstance(v, compat_str) else default
2425
2426
2427 def url_or_none(url):
2428     if not url or not isinstance(url, compat_str):
2429         return None
2430     url = url.strip()
2431     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
2432
2433
2434 def strftime_or_none(timestamp, date_format, default=None):
2435     datetime_object = None
2436     try:
2437         if isinstance(timestamp, compat_numeric_types):  # unix timestamp
2438             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
2439         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
2440             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
2441         return datetime_object.strftime(date_format)
2442     except (ValueError, TypeError, AttributeError):
2443         return default
2444
2445
2446 def parse_duration(s):
2447     if not isinstance(s, compat_basestring):
2448         return None
2449     s = s.strip()
2450     if not s:
2451         return None
2452
2453     days, hours, mins, secs, ms = [None] * 5
2454     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
2455     if m:
2456         days, hours, mins, secs, ms = m.groups()
2457     else:
2458         m = re.match(
2459             r'''(?ix)(?:P?
2460                 (?:
2461                     [0-9]+\s*y(?:ears?)?\s*
2462                 )?
2463                 (?:
2464                     [0-9]+\s*m(?:onths?)?\s*
2465                 )?
2466                 (?:
2467                     [0-9]+\s*w(?:eeks?)?\s*
2468                 )?
2469                 (?:
2470                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
2471                 )?
2472                 T)?
2473                 (?:
2474                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
2475                 )?
2476                 (?:
2477                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
2478                 )?
2479                 (?:
2480                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
2481                 )?Z?$''', s)
2482         if m:
2483             days, hours, mins, secs, ms = m.groups()
2484         else:
2485             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
2486             if m:
2487                 hours, mins = m.groups()
2488             else:
2489                 return None
2490
2491     duration = 0
2492     if secs:
2493         duration += float(secs)
2494     if mins:
2495         duration += float(mins) * 60
2496     if hours:
2497         duration += float(hours) * 60 * 60
2498     if days:
2499         duration += float(days) * 24 * 60 * 60
2500     if ms:
2501         duration += float(ms)
2502     return duration
2503
2504
2505 def prepend_extension(filename, ext, expected_real_ext=None):
2506     name, real_ext = os.path.splitext(filename)
2507     return (
2508         '{0}.{1}{2}'.format(name, ext, real_ext)
2509         if not expected_real_ext or real_ext[1:] == expected_real_ext
2510         else '{0}.{1}'.format(filename, ext))
2511
2512
2513 def replace_extension(filename, ext, expected_real_ext=None):
2514     name, real_ext = os.path.splitext(filename)
2515     return '{0}.{1}'.format(
2516         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
2517         ext)
2518
2519
2520 def check_executable(exe, args=[]):
2521     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
2522     args can be a list of arguments for a short output (like -version) """
2523     try:
2524         Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
2525     except OSError:
2526         return False
2527     return exe
2528
2529
2530 def _get_exe_version_output(exe, args):
2531     try:
2532         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2533         # SIGTTOU if yt-dlp is run in the background.
2534         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2535         out, _ = Popen(
2536             [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
2537             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
2538     except OSError:
2539         return False
2540     if isinstance(out, bytes):  # Python 2.x
2541         out = out.decode('ascii', 'ignore')
2542     return out
2543
2544
2545 def detect_exe_version(output, version_re=None, unrecognized='present'):
2546     assert isinstance(output, compat_str)
2547     if version_re is None:
2548         version_re = r'version\s+([-0-9._a-zA-Z]+)'
2549     m = re.search(version_re, output)
2550     if m:
2551         return m.group(1)
2552     else:
2553         return unrecognized
2554
2555
2556 def get_exe_version(exe, args=['--version'],
2557                     version_re=None, unrecognized='present'):
2558     """ Returns the version of the specified executable,
2559     or False if the executable is not present """
2560     out = _get_exe_version_output(exe, args)
2561     return detect_exe_version(out, version_re, unrecognized) if out else False
2562
2563
2564 class LazyList(collections.abc.Sequence):
2565     ''' Lazy immutable list from an iterable
2566     Note that slices of a LazyList are lists and not LazyList'''
2567
2568     class IndexError(IndexError):
2569         pass
2570
2571     def __init__(self, iterable, *, reverse=False, _cache=None):
2572         self.__iterable = iter(iterable)
2573         self.__cache = [] if _cache is None else _cache
2574         self.__reversed = reverse
2575
2576     def __iter__(self):
2577         if self.__reversed:
2578             # We need to consume the entire iterable to iterate in reverse
2579             yield from self.exhaust()
2580             return
2581         yield from self.__cache
2582         for item in self.__iterable:
2583             self.__cache.append(item)
2584             yield item
2585
2586     def __exhaust(self):
2587         self.__cache.extend(self.__iterable)
2588         # Discard the emptied iterable to make it pickle-able
2589         self.__iterable = []
2590         return self.__cache
2591
2592     def exhaust(self):
2593         ''' Evaluate the entire iterable '''
2594         return self.__exhaust()[::-1 if self.__reversed else 1]
2595
2596     @staticmethod
2597     def __reverse_index(x):
2598         return None if x is None else -(x + 1)
2599
2600     def __getitem__(self, idx):
2601         if isinstance(idx, slice):
2602             if self.__reversed:
2603                 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
2604             start, stop, step = idx.start, idx.stop, idx.step or 1
2605         elif isinstance(idx, int):
2606             if self.__reversed:
2607                 idx = self.__reverse_index(idx)
2608             start, stop, step = idx, idx, 0
2609         else:
2610             raise TypeError('indices must be integers or slices')
2611         if ((start or 0) < 0 or (stop or 0) < 0
2612                 or (start is None and step < 0)
2613                 or (stop is None and step > 0)):
2614             # We need to consume the entire iterable to be able to slice from the end
2615             # Obviously, never use this with infinite iterables
2616             self.__exhaust()
2617             try:
2618                 return self.__cache[idx]
2619             except IndexError as e:
2620                 raise self.IndexError(e) from e
2621         n = max(start or 0, stop or 0) - len(self.__cache) + 1
2622         if n > 0:
2623             self.__cache.extend(itertools.islice(self.__iterable, n))
2624         try:
2625             return self.__cache[idx]
2626         except IndexError as e:
2627             raise self.IndexError(e) from e
2628
2629     def __bool__(self):
2630         try:
2631             self[-1] if self.__reversed else self[0]
2632         except self.IndexError:
2633             return False
2634         return True
2635
2636     def __len__(self):
2637         self.__exhaust()
2638         return len(self.__cache)
2639
2640     def __reversed__(self):
2641         return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache)
2642
2643     def __copy__(self):
2644         return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache)
2645
2646     def __repr__(self):
2647         # repr and str should mimic a list. So we exhaust the iterable
2648         return repr(self.exhaust())
2649
2650     def __str__(self):
2651         return repr(self.exhaust())
2652
2653
2654 class PagedList:
2655
2656     class IndexError(IndexError):
2657         pass
2658
2659     def __len__(self):
2660         # This is only useful for tests
2661         return len(self.getslice())
2662
2663     def __init__(self, pagefunc, pagesize, use_cache=True):
2664         self._pagefunc = pagefunc
2665         self._pagesize = pagesize
2666         self._use_cache = use_cache
2667         self._cache = {}
2668
2669     def getpage(self, pagenum):
2670         page_results = self._cache.get(pagenum)
2671         if page_results is None:
2672             page_results = list(self._pagefunc(pagenum))
2673         if self._use_cache:
2674             self._cache[pagenum] = page_results
2675         return page_results
2676
2677     def getslice(self, start=0, end=None):
2678         return list(self._getslice(start, end))
2679
2680     def _getslice(self, start, end):
2681         raise NotImplementedError('This method must be implemented by subclasses')
2682
2683     def __getitem__(self, idx):
2684         # NOTE: cache must be enabled if this is used
2685         if not isinstance(idx, int) or idx < 0:
2686             raise TypeError('indices must be non-negative integers')
2687         entries = self.getslice(idx, idx + 1)
2688         if not entries:
2689             raise self.IndexError()
2690         return entries[0]
2691
2692
2693 class OnDemandPagedList(PagedList):
2694     def _getslice(self, start, end):
2695         for pagenum in itertools.count(start // self._pagesize):
2696             firstid = pagenum * self._pagesize
2697             nextfirstid = pagenum * self._pagesize + self._pagesize
2698             if start >= nextfirstid:
2699                 continue
2700
2701             startv = (
2702                 start % self._pagesize
2703                 if firstid <= start < nextfirstid
2704                 else 0)
2705             endv = (
2706                 ((end - 1) % self._pagesize) + 1
2707                 if (end is not None and firstid <= end <= nextfirstid)
2708                 else None)
2709
2710             page_results = self.getpage(pagenum)
2711             if startv != 0 or endv is not None:
2712                 page_results = page_results[startv:endv]
2713             yield from page_results
2714
2715             # A little optimization - if current page is not "full", ie. does
2716             # not contain page_size videos then we can assume that this page
2717             # is the last one - there are no more ids on further pages -
2718             # i.e. no need to query again.
2719             if len(page_results) + startv < self._pagesize:
2720                 break
2721
2722             # If we got the whole page, but the next page is not interesting,
2723             # break out early as well
2724             if end == nextfirstid:
2725                 break
2726
2727
2728 class InAdvancePagedList(PagedList):
2729     def __init__(self, pagefunc, pagecount, pagesize):
2730         self._pagecount = pagecount
2731         PagedList.__init__(self, pagefunc, pagesize, True)
2732
2733     def _getslice(self, start, end):
2734         start_page = start // self._pagesize
2735         end_page = (
2736             self._pagecount if end is None else (end // self._pagesize + 1))
2737         skip_elems = start - start_page * self._pagesize
2738         only_more = None if end is None else end - start
2739         for pagenum in range(start_page, end_page):
2740             page_results = self.getpage(pagenum)
2741             if skip_elems:
2742                 page_results = page_results[skip_elems:]
2743                 skip_elems = None
2744             if only_more is not None:
2745                 if len(page_results) < only_more:
2746                     only_more -= len(page_results)
2747                 else:
2748                     yield from page_results[:only_more]
2749                     break
2750             yield from page_results
2751
2752
2753 def uppercase_escape(s):
2754     unicode_escape = codecs.getdecoder('unicode_escape')
2755     return re.sub(
2756         r'\\U[0-9a-fA-F]{8}',
2757         lambda m: unicode_escape(m.group(0))[0],
2758         s)
2759
2760
2761 def lowercase_escape(s):
2762     unicode_escape = codecs.getdecoder('unicode_escape')
2763     return re.sub(
2764         r'\\u[0-9a-fA-F]{4}',
2765         lambda m: unicode_escape(m.group(0))[0],
2766         s)
2767
2768
2769 def escape_rfc3986(s):
2770     """Escape non-ASCII characters as suggested by RFC 3986"""
2771     if sys.version_info < (3, 0) and isinstance(s, compat_str):
2772         s = s.encode('utf-8')
2773     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
2774
2775
2776 def escape_url(url):
2777     """Escape URL as suggested by RFC 3986"""
2778     url_parsed = compat_urllib_parse_urlparse(url)
2779     return url_parsed._replace(
2780         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
2781         path=escape_rfc3986(url_parsed.path),
2782         params=escape_rfc3986(url_parsed.params),
2783         query=escape_rfc3986(url_parsed.query),
2784         fragment=escape_rfc3986(url_parsed.fragment)
2785     ).geturl()
2786
2787
2788 def parse_qs(url):
2789     return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2790
2791
2792 def read_batch_urls(batch_fd):
2793     def fixup(url):
2794         if not isinstance(url, compat_str):
2795             url = url.decode('utf-8', 'replace')
2796         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
2797         for bom in BOM_UTF8:
2798             if url.startswith(bom):
2799                 url = url[len(bom):]
2800         url = url.lstrip()
2801         if not url or url.startswith(('#', ';', ']')):
2802             return False
2803         # "#" cannot be stripped out since it is part of the URI
2804         # However, it can be safely stipped out if follwing a whitespace
2805         return re.split(r'\s#', url, 1)[0].rstrip()
2806
2807     with contextlib.closing(batch_fd) as fd:
2808         return [url for url in map(fixup, fd) if url]
2809
2810
2811 def urlencode_postdata(*args, **kargs):
2812     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
2813
2814
2815 def update_url_query(url, query):
2816     if not query:
2817         return url
2818     parsed_url = compat_urlparse.urlparse(url)
2819     qs = compat_parse_qs(parsed_url.query)
2820     qs.update(query)
2821     return compat_urlparse.urlunparse(parsed_url._replace(
2822         query=compat_urllib_parse_urlencode(qs, True)))
2823
2824
2825 def update_Request(req, url=None, data=None, headers={}, query={}):
2826     req_headers = req.headers.copy()
2827     req_headers.update(headers)
2828     req_data = data or req.data
2829     req_url = update_url_query(url or req.get_full_url(), query)
2830     req_get_method = req.get_method()
2831     if req_get_method == 'HEAD':
2832         req_type = HEADRequest
2833     elif req_get_method == 'PUT':
2834         req_type = PUTRequest
2835     else:
2836         req_type = compat_urllib_request.Request
2837     new_req = req_type(
2838         req_url, data=req_data, headers=req_headers,
2839         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
2840     if hasattr(req, 'timeout'):
2841         new_req.timeout = req.timeout
2842     return new_req
2843
2844
2845 def _multipart_encode_impl(data, boundary):
2846     content_type = 'multipart/form-data; boundary=%s' % boundary
2847
2848     out = b''
2849     for k, v in data.items():
2850         out += b'--' + boundary.encode('ascii') + b'\r\n'
2851         if isinstance(k, compat_str):
2852             k = k.encode('utf-8')
2853         if isinstance(v, compat_str):
2854             v = v.encode('utf-8')
2855         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
2856         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
2857         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
2858         if boundary.encode('ascii') in content:
2859             raise ValueError('Boundary overlaps with data')
2860         out += content
2861
2862     out += b'--' + boundary.encode('ascii') + b'--\r\n'
2863
2864     return out, content_type
2865
2866
2867 def multipart_encode(data, boundary=None):
2868     '''
2869     Encode a dict to RFC 7578-compliant form-data
2870
2871     data:
2872         A dict where keys and values can be either Unicode or bytes-like
2873         objects.
2874     boundary:
2875         If specified a Unicode object, it's used as the boundary. Otherwise
2876         a random boundary is generated.
2877
2878     Reference: https://tools.ietf.org/html/rfc7578
2879     '''
2880     has_specified_boundary = boundary is not None
2881
2882     while True:
2883         if boundary is None:
2884             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
2885
2886         try:
2887             out, content_type = _multipart_encode_impl(data, boundary)
2888             break
2889         except ValueError:
2890             if has_specified_boundary:
2891                 raise
2892             boundary = None
2893
2894     return out, content_type
2895
2896
2897 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
2898     if isinstance(key_or_keys, (list, tuple)):
2899         for key in key_or_keys:
2900             if key not in d or d[key] is None or skip_false_values and not d[key]:
2901                 continue
2902             return d[key]
2903         return default
2904     return d.get(key_or_keys, default)
2905
2906
2907 def try_get(src, getter, expected_type=None):
2908     for get in variadic(getter):
2909         try:
2910             v = get(src)
2911         except (AttributeError, KeyError, TypeError, IndexError):
2912             pass
2913         else:
2914             if expected_type is None or isinstance(v, expected_type):
2915                 return v
2916
2917
2918 def merge_dicts(*dicts):
2919     merged = {}
2920     for a_dict in dicts:
2921         for k, v in a_dict.items():
2922             if v is None:
2923                 continue
2924             if (k not in merged
2925                     or (isinstance(v, compat_str) and v
2926                         and isinstance(merged[k], compat_str)
2927                         and not merged[k])):
2928                 merged[k] = v
2929     return merged
2930
2931
2932 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
2933     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
2934
2935
2936 US_RATINGS = {
2937     'G': 0,
2938     'PG': 10,
2939     'PG-13': 13,
2940     'R': 16,
2941     'NC': 18,
2942 }
2943
2944
2945 TV_PARENTAL_GUIDELINES = {
2946     'TV-Y': 0,
2947     'TV-Y7': 7,
2948     'TV-G': 0,
2949     'TV-PG': 0,
2950     'TV-14': 14,
2951     'TV-MA': 17,
2952 }
2953
2954
2955 def parse_age_limit(s):
2956     if type(s) == int:
2957         return s if 0 <= s <= 21 else None
2958     if not isinstance(s, compat_basestring):
2959         return None
2960     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
2961     if m:
2962         return int(m.group('age'))
2963     s = s.upper()
2964     if s in US_RATINGS:
2965         return US_RATINGS[s]
2966     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
2967     if m:
2968         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
2969     return None
2970
2971
2972 def strip_jsonp(code):
2973     return re.sub(
2974         r'''(?sx)^
2975             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
2976             (?:\s*&&\s*(?P=func_name))?
2977             \s*\(\s*(?P<callback_data>.*)\);?
2978             \s*?(?://[^\n]*)*$''',
2979         r'\g<callback_data>', code)
2980
2981
2982 def js_to_json(code, vars={}):
2983     # vars is a dict of var, val pairs to substitute
2984     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
2985     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
2986     INTEGER_TABLE = (
2987         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
2988         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
2989     )
2990
2991     def fix_kv(m):
2992         v = m.group(0)
2993         if v in ('true', 'false', 'null'):
2994             return v
2995         elif v in ('undefined', 'void 0'):
2996             return 'null'
2997         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
2998             return ""
2999
3000         if v[0] in ("'", '"'):
3001             v = re.sub(r'(?s)\\.|"', lambda m: {
3002                 '"': '\\"',
3003                 "\\'": "'",
3004                 '\\\n': '',
3005                 '\\x': '\\u00',
3006             }.get(m.group(0), m.group(0)), v[1:-1])
3007         else:
3008             for regex, base in INTEGER_TABLE:
3009                 im = re.match(regex, v)
3010                 if im:
3011                     i = int(im.group(1), base)
3012                     return '"%d":' % i if v.endswith(':') else '%d' % i
3013
3014             if v in vars:
3015                 return vars[v]
3016
3017         return '"%s"' % v
3018
3019     return re.sub(r'''(?sx)
3020         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
3021         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
3022         {comment}|,(?={skip}[\]}}])|
3023         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
3024         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
3025         [0-9]+(?={skip}:)|
3026         !+
3027         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
3028
3029
3030 def qualities(quality_ids):
3031     """ Get a numeric quality value out of a list of possible values """
3032     def q(qid):
3033         try:
3034             return quality_ids.index(qid)
3035         except ValueError:
3036             return -1
3037     return q
3038
3039
3040 POSTPROCESS_WHEN = {'pre_process', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist'}
3041
3042
3043 DEFAULT_OUTTMPL = {
3044     'default': '%(title)s [%(id)s].%(ext)s',
3045     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
3046 }
3047 OUTTMPL_TYPES = {
3048     'chapter': None,
3049     'subtitle': None,
3050     'thumbnail': None,
3051     'description': 'description',
3052     'annotation': 'annotations.xml',
3053     'infojson': 'info.json',
3054     'link': None,
3055     'pl_thumbnail': None,
3056     'pl_description': 'description',
3057     'pl_infojson': 'info.json',
3058 }
3059
3060 # As of [1] format syntax is:
3061 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
3062 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
3063 STR_FORMAT_RE_TMPL = r'''(?x)
3064     (?<!%)(?P<prefix>(?:%%)*)
3065     %
3066     (?P<has_key>\((?P<key>{0})\))?
3067     (?P<format>
3068         (?P<conversion>[#0\-+ ]+)?
3069         (?P<min_width>\d+)?
3070         (?P<precision>\.\d+)?
3071         (?P<len_mod>[hlL])?  # unused in python
3072         {1}  # conversion type
3073     )
3074 '''
3075
3076
3077 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
3078
3079
3080 def limit_length(s, length):
3081     """ Add ellipses to overly long strings """
3082     if s is None:
3083         return None
3084     ELLIPSES = '...'
3085     if len(s) > length:
3086         return s[:length - len(ELLIPSES)] + ELLIPSES
3087     return s
3088
3089
3090 def version_tuple(v):
3091     return tuple(int(e) for e in re.split(r'[-.]', v))
3092
3093
3094 def is_outdated_version(version, limit, assume_new=True):
3095     if not version:
3096         return not assume_new
3097     try:
3098         return version_tuple(version) < version_tuple(limit)
3099     except ValueError:
3100         return not assume_new
3101
3102
3103 def ytdl_is_updateable():
3104     """ Returns if yt-dlp can be updated with -U """
3105
3106     from .update import is_non_updateable
3107
3108     return not is_non_updateable()
3109
3110
3111 def args_to_str(args):
3112     # Get a short string representation for a subprocess command
3113     return ' '.join(compat_shlex_quote(a) for a in args)
3114
3115
3116 def error_to_compat_str(err):
3117     err_str = str(err)
3118     # On python 2 error byte string must be decoded with proper
3119     # encoding rather than ascii
3120     if sys.version_info[0] < 3:
3121         err_str = err_str.decode(preferredencoding())
3122     return err_str
3123
3124
3125 def mimetype2ext(mt):
3126     if mt is None:
3127         return None
3128
3129     mt, _, params = mt.partition(';')
3130     mt = mt.strip()
3131
3132     FULL_MAP = {
3133         'audio/mp4': 'm4a',
3134         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
3135         # it's the most popular one
3136         'audio/mpeg': 'mp3',
3137         'audio/x-wav': 'wav',
3138         'audio/wav': 'wav',
3139         'audio/wave': 'wav',
3140     }
3141
3142     ext = FULL_MAP.get(mt)
3143     if ext is not None:
3144         return ext
3145
3146     SUBTYPE_MAP = {
3147         '3gpp': '3gp',
3148         'smptett+xml': 'tt',
3149         'ttaf+xml': 'dfxp',
3150         'ttml+xml': 'ttml',
3151         'x-flv': 'flv',
3152         'x-mp4-fragmented': 'mp4',
3153         'x-ms-sami': 'sami',
3154         'x-ms-wmv': 'wmv',
3155         'mpegurl': 'm3u8',
3156         'x-mpegurl': 'm3u8',
3157         'vnd.apple.mpegurl': 'm3u8',
3158         'dash+xml': 'mpd',
3159         'f4m+xml': 'f4m',
3160         'hds+xml': 'f4m',
3161         'vnd.ms-sstr+xml': 'ism',
3162         'quicktime': 'mov',
3163         'mp2t': 'ts',
3164         'x-wav': 'wav',
3165         'filmstrip+json': 'fs',
3166         'svg+xml': 'svg',
3167     }
3168
3169     _, _, subtype = mt.rpartition('/')
3170     ext = SUBTYPE_MAP.get(subtype.lower())
3171     if ext is not None:
3172         return ext
3173
3174     SUFFIX_MAP = {
3175         'json': 'json',
3176         'xml': 'xml',
3177         'zip': 'zip',
3178         'gzip': 'gz',
3179     }
3180
3181     _, _, suffix = subtype.partition('+')
3182     ext = SUFFIX_MAP.get(suffix)
3183     if ext is not None:
3184         return ext
3185
3186     return subtype.replace('+', '.')
3187
3188
3189 def ext2mimetype(ext_or_url):
3190     if not ext_or_url:
3191         return None
3192     if '.' not in ext_or_url:
3193         ext_or_url = f'file.{ext_or_url}'
3194     return mimetypes.guess_type(ext_or_url)[0]
3195
3196
3197 def parse_codecs(codecs_str):
3198     # http://tools.ietf.org/html/rfc6381
3199     if not codecs_str:
3200         return {}
3201     split_codecs = list(filter(None, map(
3202         str.strip, codecs_str.strip().strip(',').split(','))))
3203     vcodec, acodec, tcodec, hdr = None, None, None, None
3204     for full_codec in split_codecs:
3205         parts = full_codec.split('.')
3206         codec = parts[0].replace('0', '')
3207         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
3208                      'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
3209             if not vcodec:
3210                 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
3211                 if codec in ('dvh1', 'dvhe'):
3212                     hdr = 'DV'
3213                 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
3214                     hdr = 'HDR10'
3215                 elif full_codec.replace('0', '').startswith('vp9.2'):
3216                     hdr = 'HDR10'
3217         elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
3218             if not acodec:
3219                 acodec = full_codec
3220         elif codec in ('stpp', 'wvtt',):
3221             if not tcodec:
3222                 tcodec = full_codec
3223         else:
3224             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
3225     if vcodec or acodec or tcodec:
3226         return {
3227             'vcodec': vcodec or 'none',
3228             'acodec': acodec or 'none',
3229             'dynamic_range': hdr,
3230             **({'tcodec': tcodec} if tcodec is not None else {}),
3231         }
3232     elif len(split_codecs) == 2:
3233         return {
3234             'vcodec': split_codecs[0],
3235             'acodec': split_codecs[1],
3236         }
3237     return {}
3238
3239
3240 def urlhandle_detect_ext(url_handle):
3241     getheader = url_handle.headers.get
3242
3243     cd = getheader('Content-Disposition')
3244     if cd:
3245         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
3246         if m:
3247             e = determine_ext(m.group('filename'), default_ext=None)
3248             if e:
3249                 return e
3250
3251     return mimetype2ext(getheader('Content-Type'))
3252
3253
3254 def encode_data_uri(data, mime_type):
3255     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
3256
3257
3258 def age_restricted(content_limit, age_limit):
3259     """ Returns True iff the content should be blocked """
3260
3261     if age_limit is None:  # No limit set
3262         return False
3263     if content_limit is None:
3264         return False  # Content available for everyone
3265     return age_limit < content_limit
3266
3267
3268 def is_html(first_bytes):
3269     """ Detect whether a file contains HTML by examining its first bytes. """
3270
3271     BOMS = [
3272         (b'\xef\xbb\xbf', 'utf-8'),
3273         (b'\x00\x00\xfe\xff', 'utf-32-be'),
3274         (b'\xff\xfe\x00\x00', 'utf-32-le'),
3275         (b'\xff\xfe', 'utf-16-le'),
3276         (b'\xfe\xff', 'utf-16-be'),
3277     ]
3278     for bom, enc in BOMS:
3279         if first_bytes.startswith(bom):
3280             s = first_bytes[len(bom):].decode(enc, 'replace')
3281             break
3282     else:
3283         s = first_bytes.decode('utf-8', 'replace')
3284
3285     return re.match(r'^\s*<', s)
3286
3287
3288 def determine_protocol(info_dict):
3289     protocol = info_dict.get('protocol')
3290     if protocol is not None:
3291         return protocol
3292
3293     url = sanitize_url(info_dict['url'])
3294     if url.startswith('rtmp'):
3295         return 'rtmp'
3296     elif url.startswith('mms'):
3297         return 'mms'
3298     elif url.startswith('rtsp'):
3299         return 'rtsp'
3300
3301     ext = determine_ext(url)
3302     if ext == 'm3u8':
3303         return 'm3u8'
3304     elif ext == 'f4m':
3305         return 'f4m'
3306
3307     return compat_urllib_parse_urlparse(url).scheme
3308
3309
3310 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
3311     """ Render a list of rows, each as a list of values.
3312     Text after a \t will be right aligned """
3313     def width(string):
3314         return len(remove_terminal_sequences(string).replace('\t', ''))
3315
3316     def get_max_lens(table):
3317         return [max(width(str(v)) for v in col) for col in zip(*table)]
3318
3319     def filter_using_list(row, filterArray):
3320         return [col for (take, col) in zip(filterArray, row) if take]
3321
3322     if hide_empty:
3323         max_lens = get_max_lens(data)
3324         header_row = filter_using_list(header_row, max_lens)
3325         data = [filter_using_list(row, max_lens) for row in data]
3326
3327     table = [header_row] + data
3328     max_lens = get_max_lens(table)
3329     extra_gap += 1
3330     if delim:
3331         table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
3332         table[1][-1] = table[1][-1][:-extra_gap]  # Remove extra_gap from end of delimiter
3333     for row in table:
3334         for pos, text in enumerate(map(str, row)):
3335             if '\t' in text:
3336                 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
3337             else:
3338                 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
3339     ret = '\n'.join(''.join(row).rstrip() for row in table)
3340     return ret
3341
3342
3343 def _match_one(filter_part, dct, incomplete):
3344     # TODO: Generalize code with YoutubeDL._build_format_filter
3345     STRING_OPERATORS = {
3346         '*=': operator.contains,
3347         '^=': lambda attr, value: attr.startswith(value),
3348         '$=': lambda attr, value: attr.endswith(value),
3349         '~=': lambda attr, value: re.search(value, attr),
3350     }
3351     COMPARISON_OPERATORS = {
3352         **STRING_OPERATORS,
3353         '<=': operator.le,  # "<=" must be defined above "<"
3354         '<': operator.lt,
3355         '>=': operator.ge,
3356         '>': operator.gt,
3357         '=': operator.eq,
3358     }
3359
3360     operator_rex = re.compile(r'''(?x)\s*
3361         (?P<key>[a-z_]+)
3362         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
3363         (?:
3364             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
3365             (?P<strval>.+?)
3366         )
3367         \s*$
3368         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
3369     m = operator_rex.search(filter_part)
3370     if m:
3371         m = m.groupdict()
3372         unnegated_op = COMPARISON_OPERATORS[m['op']]
3373         if m['negation']:
3374             op = lambda attr, value: not unnegated_op(attr, value)
3375         else:
3376             op = unnegated_op
3377         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
3378         if m['quote']:
3379             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
3380         actual_value = dct.get(m['key'])
3381         numeric_comparison = None
3382         if isinstance(actual_value, compat_numeric_types):
3383             # If the original field is a string and matching comparisonvalue is
3384             # a number we should respect the origin of the original field
3385             # and process comparison value as a string (see
3386             # https://github.com/ytdl-org/youtube-dl/issues/11082)
3387             try:
3388                 numeric_comparison = int(comparison_value)
3389             except ValueError:
3390                 numeric_comparison = parse_filesize(comparison_value)
3391                 if numeric_comparison is None:
3392                     numeric_comparison = parse_filesize(f'{comparison_value}B')
3393                 if numeric_comparison is None:
3394                     numeric_comparison = parse_duration(comparison_value)
3395         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
3396             raise ValueError('Operator %s only supports string values!' % m['op'])
3397         if actual_value is None:
3398             return incomplete or m['none_inclusive']
3399         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
3400
3401     UNARY_OPERATORS = {
3402         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
3403         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
3404     }
3405     operator_rex = re.compile(r'''(?x)\s*
3406         (?P<op>%s)\s*(?P<key>[a-z_]+)
3407         \s*$
3408         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
3409     m = operator_rex.search(filter_part)
3410     if m:
3411         op = UNARY_OPERATORS[m.group('op')]
3412         actual_value = dct.get(m.group('key'))
3413         if incomplete and actual_value is None:
3414             return True
3415         return op(actual_value)
3416
3417     raise ValueError('Invalid filter part %r' % filter_part)
3418
3419
3420 def match_str(filter_str, dct, incomplete=False):
3421     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
3422         When incomplete, all conditions passes on missing fields
3423     """
3424     return all(
3425         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
3426         for filter_part in re.split(r'(?<!\\)&', filter_str))
3427
3428
3429 def match_filter_func(filter_str):
3430     def _match_func(info_dict, *args, **kwargs):
3431         if match_str(filter_str, info_dict, *args, **kwargs):
3432             return None
3433         else:
3434             video_title = info_dict.get('title', info_dict.get('id', 'video'))
3435             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
3436     return _match_func
3437
3438
3439 def parse_dfxp_time_expr(time_expr):
3440     if not time_expr:
3441         return
3442
3443     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
3444     if mobj:
3445         return float(mobj.group('time_offset'))
3446
3447     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
3448     if mobj:
3449         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
3450
3451
3452 def srt_subtitles_timecode(seconds):
3453     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
3454
3455
3456 def ass_subtitles_timecode(seconds):
3457     time = timetuple_from_msec(seconds * 1000)
3458     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
3459
3460
3461 def dfxp2srt(dfxp_data):
3462     '''
3463     @param dfxp_data A bytes-like object containing DFXP data
3464     @returns A unicode object containing converted SRT data
3465     '''
3466     LEGACY_NAMESPACES = (
3467         (b'http://www.w3.org/ns/ttml', [
3468             b'http://www.w3.org/2004/11/ttaf1',
3469             b'http://www.w3.org/2006/04/ttaf1',
3470             b'http://www.w3.org/2006/10/ttaf1',
3471         ]),
3472         (b'http://www.w3.org/ns/ttml#styling', [
3473             b'http://www.w3.org/ns/ttml#style',
3474         ]),
3475     )
3476
3477     SUPPORTED_STYLING = [
3478         'color',
3479         'fontFamily',
3480         'fontSize',
3481         'fontStyle',
3482         'fontWeight',
3483         'textDecoration'
3484     ]
3485
3486     _x = functools.partial(xpath_with_ns, ns_map={
3487         'xml': 'http://www.w3.org/XML/1998/namespace',
3488         'ttml': 'http://www.w3.org/ns/ttml',
3489         'tts': 'http://www.w3.org/ns/ttml#styling',
3490     })
3491
3492     styles = {}
3493     default_style = {}
3494
3495     class TTMLPElementParser(object):
3496         _out = ''
3497         _unclosed_elements = []
3498         _applied_styles = []
3499
3500         def start(self, tag, attrib):
3501             if tag in (_x('ttml:br'), 'br'):
3502                 self._out += '\n'
3503             else:
3504                 unclosed_elements = []
3505                 style = {}
3506                 element_style_id = attrib.get('style')
3507                 if default_style:
3508                     style.update(default_style)
3509                 if element_style_id:
3510                     style.update(styles.get(element_style_id, {}))
3511                 for prop in SUPPORTED_STYLING:
3512                     prop_val = attrib.get(_x('tts:' + prop))
3513                     if prop_val:
3514                         style[prop] = prop_val
3515                 if style:
3516                     font = ''
3517                     for k, v in sorted(style.items()):
3518                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
3519                             continue
3520                         if k == 'color':
3521                             font += ' color="%s"' % v
3522                         elif k == 'fontSize':
3523                             font += ' size="%s"' % v
3524                         elif k == 'fontFamily':
3525                             font += ' face="%s"' % v
3526                         elif k == 'fontWeight' and v == 'bold':
3527                             self._out += '<b>'
3528                             unclosed_elements.append('b')
3529                         elif k == 'fontStyle' and v == 'italic':
3530                             self._out += '<i>'
3531                             unclosed_elements.append('i')
3532                         elif k == 'textDecoration' and v == 'underline':
3533                             self._out += '<u>'
3534                             unclosed_elements.append('u')
3535                     if font:
3536                         self._out += '<font' + font + '>'
3537                         unclosed_elements.append('font')
3538                     applied_style = {}
3539                     if self._applied_styles:
3540                         applied_style.update(self._applied_styles[-1])
3541                     applied_style.update(style)
3542                     self._applied_styles.append(applied_style)
3543                 self._unclosed_elements.append(unclosed_elements)
3544
3545         def end(self, tag):
3546             if tag not in (_x('ttml:br'), 'br'):
3547                 unclosed_elements = self._unclosed_elements.pop()
3548                 for element in reversed(unclosed_elements):
3549                     self._out += '</%s>' % element
3550                 if unclosed_elements and self._applied_styles:
3551                     self._applied_styles.pop()
3552
3553         def data(self, data):
3554             self._out += data
3555
3556         def close(self):
3557             return self._out.strip()
3558
3559     def parse_node(node):
3560         target = TTMLPElementParser()
3561         parser = xml.etree.ElementTree.XMLParser(target=target)
3562         parser.feed(xml.etree.ElementTree.tostring(node))
3563         return parser.close()
3564
3565     for k, v in LEGACY_NAMESPACES:
3566         for ns in v:
3567             dfxp_data = dfxp_data.replace(ns, k)
3568
3569     dfxp = compat_etree_fromstring(dfxp_data)
3570     out = []
3571     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
3572
3573     if not paras:
3574         raise ValueError('Invalid dfxp/TTML subtitle')
3575
3576     repeat = False
3577     while True:
3578         for style in dfxp.findall(_x('.//ttml:style')):
3579             style_id = style.get('id') or style.get(_x('xml:id'))
3580             if not style_id:
3581                 continue
3582             parent_style_id = style.get('style')
3583             if parent_style_id:
3584                 if parent_style_id not in styles:
3585                     repeat = True
3586                     continue
3587                 styles[style_id] = styles[parent_style_id].copy()
3588             for prop in SUPPORTED_STYLING:
3589                 prop_val = style.get(_x('tts:' + prop))
3590                 if prop_val:
3591                     styles.setdefault(style_id, {})[prop] = prop_val
3592         if repeat:
3593             repeat = False
3594         else:
3595             break
3596
3597     for p in ('body', 'div'):
3598         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
3599         if ele is None:
3600             continue
3601         style = styles.get(ele.get('style'))
3602         if not style:
3603             continue
3604         default_style.update(style)
3605
3606     for para, index in zip(paras, itertools.count(1)):
3607         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
3608         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
3609         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
3610         if begin_time is None:
3611             continue
3612         if not end_time:
3613             if not dur:
3614                 continue
3615             end_time = begin_time + dur
3616         out.append('%d\n%s --> %s\n%s\n\n' % (
3617             index,
3618             srt_subtitles_timecode(begin_time),
3619             srt_subtitles_timecode(end_time),
3620             parse_node(para)))
3621
3622     return ''.join(out)
3623
3624
3625 def cli_option(params, command_option, param):
3626     param = params.get(param)
3627     if param:
3628         param = compat_str(param)
3629     return [command_option, param] if param is not None else []
3630
3631
3632 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
3633     param = params.get(param)
3634     if param is None:
3635         return []
3636     assert isinstance(param, bool)
3637     if separator:
3638         return [command_option + separator + (true_value if param else false_value)]
3639     return [command_option, true_value if param else false_value]
3640
3641
3642 def cli_valueless_option(params, command_option, param, expected_value=True):
3643     param = params.get(param)
3644     return [command_option] if param == expected_value else []
3645
3646
3647 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
3648     if isinstance(argdict, (list, tuple)):  # for backward compatibility
3649         if use_compat:
3650             return argdict
3651         else:
3652             argdict = None
3653     if argdict is None:
3654         return default
3655     assert isinstance(argdict, dict)
3656
3657     assert isinstance(keys, (list, tuple))
3658     for key_list in keys:
3659         arg_list = list(filter(
3660             lambda x: x is not None,
3661             [argdict.get(key.lower()) for key in variadic(key_list)]))
3662         if arg_list:
3663             return [arg for args in arg_list for arg in args]
3664     return default
3665
3666
3667 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
3668     main_key, exe = main_key.lower(), exe.lower()
3669     root_key = exe if main_key == exe else f'{main_key}+{exe}'
3670     keys = [f'{root_key}{k}' for k in (keys or [''])]
3671     if root_key in keys:
3672         if main_key != exe:
3673             keys.append((main_key, exe))
3674         keys.append('default')
3675     else:
3676         use_compat = False
3677     return cli_configuration_args(argdict, keys, default, use_compat)
3678
3679
3680 class ISO639Utils(object):
3681     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
3682     _lang_map = {
3683         'aa': 'aar',
3684         'ab': 'abk',
3685         'ae': 'ave',
3686         'af': 'afr',
3687         'ak': 'aka',
3688         'am': 'amh',
3689         'an': 'arg',
3690         'ar': 'ara',
3691         'as': 'asm',
3692         'av': 'ava',
3693         'ay': 'aym',
3694         'az': 'aze',
3695         'ba': 'bak',
3696         'be': 'bel',
3697         'bg': 'bul',
3698         'bh': 'bih',
3699         'bi': 'bis',
3700         'bm': 'bam',
3701         'bn': 'ben',
3702         'bo': 'bod',
3703         'br': 'bre',
3704         'bs': 'bos',
3705         'ca': 'cat',
3706         'ce': 'che',
3707         'ch': 'cha',
3708         'co': 'cos',
3709         'cr': 'cre',
3710         'cs': 'ces',
3711         'cu': 'chu',
3712         'cv': 'chv',
3713         'cy': 'cym',
3714         'da': 'dan',
3715         'de': 'deu',
3716         'dv': 'div',
3717         'dz': 'dzo',
3718         'ee': 'ewe',
3719         'el': 'ell',
3720         'en': 'eng',
3721         'eo': 'epo',
3722         'es': 'spa',
3723         'et': 'est',
3724         'eu': 'eus',
3725         'fa': 'fas',
3726         'ff': 'ful',
3727         'fi': 'fin',
3728         'fj': 'fij',
3729         'fo': 'fao',
3730         'fr': 'fra',
3731         'fy': 'fry',
3732         'ga': 'gle',
3733         'gd': 'gla',
3734         'gl': 'glg',
3735         'gn': 'grn',
3736         'gu': 'guj',
3737         'gv': 'glv',
3738         'ha': 'hau',
3739         'he': 'heb',
3740         'iw': 'heb',  # Replaced by he in 1989 revision
3741         'hi': 'hin',
3742         'ho': 'hmo',
3743         'hr': 'hrv',
3744         'ht': 'hat',
3745         'hu': 'hun',
3746         'hy': 'hye',
3747         'hz': 'her',
3748         'ia': 'ina',
3749         'id': 'ind',
3750         'in': 'ind',  # Replaced by id in 1989 revision
3751         'ie': 'ile',
3752         'ig': 'ibo',
3753         'ii': 'iii',
3754         'ik': 'ipk',
3755         'io': 'ido',
3756         'is': 'isl',
3757         'it': 'ita',
3758         'iu': 'iku',
3759         'ja': 'jpn',
3760         'jv': 'jav',
3761         'ka': 'kat',
3762         'kg': 'kon',
3763         'ki': 'kik',
3764         'kj': 'kua',
3765         'kk': 'kaz',
3766         'kl': 'kal',
3767         'km': 'khm',
3768         'kn': 'kan',
3769         'ko': 'kor',
3770         'kr': 'kau',
3771         'ks': 'kas',
3772         'ku': 'kur',
3773         'kv': 'kom',
3774         'kw': 'cor',
3775         'ky': 'kir',
3776         'la': 'lat',
3777         'lb': 'ltz',
3778         'lg': 'lug',
3779         'li': 'lim',
3780         'ln': 'lin',
3781         'lo': 'lao',
3782         'lt': 'lit',
3783         'lu': 'lub',
3784         'lv': 'lav',
3785         'mg': 'mlg',
3786         'mh': 'mah',
3787         'mi': 'mri',
3788         'mk': 'mkd',
3789         'ml': 'mal',
3790         'mn': 'mon',
3791         'mr': 'mar',
3792         'ms': 'msa',
3793         'mt': 'mlt',
3794         'my': 'mya',
3795         'na': 'nau',
3796         'nb': 'nob',
3797         'nd': 'nde',
3798         'ne': 'nep',
3799         'ng': 'ndo',
3800         'nl': 'nld',
3801         'nn': 'nno',
3802         'no': 'nor',
3803         'nr': 'nbl',
3804         'nv': 'nav',
3805         'ny': 'nya',
3806         'oc': 'oci',
3807         'oj': 'oji',
3808         'om': 'orm',
3809         'or': 'ori',
3810         'os': 'oss',
3811         'pa': 'pan',
3812         'pi': 'pli',
3813         'pl': 'pol',
3814         'ps': 'pus',
3815         'pt': 'por',
3816         'qu': 'que',
3817         'rm': 'roh',
3818         'rn': 'run',
3819         'ro': 'ron',
3820         'ru': 'rus',
3821         'rw': 'kin',
3822         'sa': 'san',
3823         'sc': 'srd',
3824         'sd': 'snd',
3825         'se': 'sme',
3826         'sg': 'sag',
3827         'si': 'sin',
3828         'sk': 'slk',
3829         'sl': 'slv',
3830         'sm': 'smo',
3831         'sn': 'sna',
3832         'so': 'som',
3833         'sq': 'sqi',
3834         'sr': 'srp',
3835         'ss': 'ssw',
3836         'st': 'sot',
3837         'su': 'sun',
3838         'sv': 'swe',
3839         'sw': 'swa',
3840         'ta': 'tam',
3841         'te': 'tel',
3842         'tg': 'tgk',
3843         'th': 'tha',
3844         'ti': 'tir',
3845         'tk': 'tuk',
3846         'tl': 'tgl',
3847         'tn': 'tsn',
3848         'to': 'ton',
3849         'tr': 'tur',
3850         'ts': 'tso',
3851         'tt': 'tat',
3852         'tw': 'twi',
3853         'ty': 'tah',
3854         'ug': 'uig',
3855         'uk': 'ukr',
3856         'ur': 'urd',
3857         'uz': 'uzb',
3858         've': 'ven',
3859         'vi': 'vie',
3860         'vo': 'vol',
3861         'wa': 'wln',
3862         'wo': 'wol',
3863         'xh': 'xho',
3864         'yi': 'yid',
3865         'ji': 'yid',  # Replaced by yi in 1989 revision
3866         'yo': 'yor',
3867         'za': 'zha',
3868         'zh': 'zho',
3869         'zu': 'zul',
3870     }
3871
3872     @classmethod
3873     def short2long(cls, code):
3874         """Convert language code from ISO 639-1 to ISO 639-2/T"""
3875         return cls._lang_map.get(code[:2])
3876
3877     @classmethod
3878     def long2short(cls, code):
3879         """Convert language code from ISO 639-2/T to ISO 639-1"""
3880         for short_name, long_name in cls._lang_map.items():
3881             if long_name == code:
3882                 return short_name
3883
3884
3885 class ISO3166Utils(object):
3886     # From http://data.okfn.org/data/core/country-list
3887     _country_map = {
3888         'AF': 'Afghanistan',
3889         'AX': 'Åland Islands',
3890         'AL': 'Albania',
3891         'DZ': 'Algeria',
3892         'AS': 'American Samoa',
3893         'AD': 'Andorra',
3894         'AO': 'Angola',
3895         'AI': 'Anguilla',
3896         'AQ': 'Antarctica',
3897         'AG': 'Antigua and Barbuda',
3898         'AR': 'Argentina',
3899         'AM': 'Armenia',
3900         'AW': 'Aruba',
3901         'AU': 'Australia',
3902         'AT': 'Austria',
3903         'AZ': 'Azerbaijan',
3904         'BS': 'Bahamas',
3905         'BH': 'Bahrain',
3906         'BD': 'Bangladesh',
3907         'BB': 'Barbados',
3908         'BY': 'Belarus',
3909         'BE': 'Belgium',
3910         'BZ': 'Belize',
3911         'BJ': 'Benin',
3912         'BM': 'Bermuda',
3913         'BT': 'Bhutan',
3914         'BO': 'Bolivia, Plurinational State of',
3915         'BQ': 'Bonaire, Sint Eustatius and Saba',
3916         'BA': 'Bosnia and Herzegovina',
3917         'BW': 'Botswana',
3918         'BV': 'Bouvet Island',
3919         'BR': 'Brazil',
3920         'IO': 'British Indian Ocean Territory',
3921         'BN': 'Brunei Darussalam',
3922         'BG': 'Bulgaria',
3923         'BF': 'Burkina Faso',
3924         'BI': 'Burundi',
3925         'KH': 'Cambodia',
3926         'CM': 'Cameroon',
3927         'CA': 'Canada',
3928         'CV': 'Cape Verde',
3929         'KY': 'Cayman Islands',
3930         'CF': 'Central African Republic',
3931         'TD': 'Chad',
3932         'CL': 'Chile',
3933         'CN': 'China',
3934         'CX': 'Christmas Island',
3935         'CC': 'Cocos (Keeling) Islands',
3936         'CO': 'Colombia',
3937         'KM': 'Comoros',
3938         'CG': 'Congo',
3939         'CD': 'Congo, the Democratic Republic of the',
3940         'CK': 'Cook Islands',
3941         'CR': 'Costa Rica',
3942         'CI': 'Côte d\'Ivoire',
3943         'HR': 'Croatia',
3944         'CU': 'Cuba',
3945         'CW': 'Curaçao',
3946         'CY': 'Cyprus',
3947         'CZ': 'Czech Republic',
3948         'DK': 'Denmark',
3949         'DJ': 'Djibouti',
3950         'DM': 'Dominica',
3951         'DO': 'Dominican Republic',
3952         'EC': 'Ecuador',
3953         'EG': 'Egypt',
3954         'SV': 'El Salvador',
3955         'GQ': 'Equatorial Guinea',
3956         'ER': 'Eritrea',
3957         'EE': 'Estonia',
3958         'ET': 'Ethiopia',
3959         'FK': 'Falkland Islands (Malvinas)',
3960         'FO': 'Faroe Islands',
3961         'FJ': 'Fiji',
3962         'FI': 'Finland',
3963         'FR': 'France',
3964         'GF': 'French Guiana',
3965         'PF': 'French Polynesia',
3966         'TF': 'French Southern Territories',
3967         'GA': 'Gabon',
3968         'GM': 'Gambia',
3969         'GE': 'Georgia',
3970         'DE': 'Germany',
3971         'GH': 'Ghana',
3972         'GI': 'Gibraltar',
3973         'GR': 'Greece',
3974         'GL': 'Greenland',
3975         'GD': 'Grenada',
3976         'GP': 'Guadeloupe',
3977         'GU': 'Guam',
3978         'GT': 'Guatemala',
3979         'GG': 'Guernsey',
3980         'GN': 'Guinea',
3981         'GW': 'Guinea-Bissau',
3982         'GY': 'Guyana',
3983         'HT': 'Haiti',
3984         'HM': 'Heard Island and McDonald Islands',
3985         'VA': 'Holy See (Vatican City State)',
3986         'HN': 'Honduras',
3987         'HK': 'Hong Kong',
3988         'HU': 'Hungary',
3989         'IS': 'Iceland',
3990         'IN': 'India',
3991         'ID': 'Indonesia',
3992         'IR': 'Iran, Islamic Republic of',
3993         'IQ': 'Iraq',
3994         'IE': 'Ireland',
3995         'IM': 'Isle of Man',
3996         'IL': 'Israel',
3997         'IT': 'Italy',
3998         'JM': 'Jamaica',
3999         'JP': 'Japan',
4000         'JE': 'Jersey',
4001         'JO': 'Jordan',
4002         'KZ': 'Kazakhstan',
4003         'KE': 'Kenya',
4004         'KI': 'Kiribati',
4005         'KP': 'Korea, Democratic People\'s Republic of',
4006         'KR': 'Korea, Republic of',
4007         'KW': 'Kuwait',
4008         'KG': 'Kyrgyzstan',
4009         'LA': 'Lao People\'s Democratic Republic',
4010         'LV': 'Latvia',
4011         'LB': 'Lebanon',
4012         'LS': 'Lesotho',
4013         'LR': 'Liberia',
4014         'LY': 'Libya',
4015         'LI': 'Liechtenstein',
4016         'LT': 'Lithuania',
4017         'LU': 'Luxembourg',
4018         'MO': 'Macao',
4019         'MK': 'Macedonia, the Former Yugoslav Republic of',
4020         'MG': 'Madagascar',
4021         'MW': 'Malawi',
4022         'MY': 'Malaysia',
4023         'MV': 'Maldives',
4024         'ML': 'Mali',
4025         'MT': 'Malta',
4026         'MH': 'Marshall Islands',
4027         'MQ': 'Martinique',
4028         'MR': 'Mauritania',
4029         'MU': 'Mauritius',
4030         'YT': 'Mayotte',
4031         'MX': 'Mexico',
4032         'FM': 'Micronesia, Federated States of',
4033         'MD': 'Moldova, Republic of',
4034         'MC': 'Monaco',
4035         'MN': 'Mongolia',
4036         'ME': 'Montenegro',
4037         'MS': 'Montserrat',
4038         'MA': 'Morocco',
4039         'MZ': 'Mozambique',
4040         'MM': 'Myanmar',
4041         'NA': 'Namibia',
4042         'NR': 'Nauru',
4043         'NP': 'Nepal',
4044         'NL': 'Netherlands',
4045         'NC': 'New Caledonia',
4046         'NZ': 'New Zealand',
4047         'NI': 'Nicaragua',
4048         'NE': 'Niger',
4049         'NG': 'Nigeria',
4050         'NU': 'Niue',
4051         'NF': 'Norfolk Island',
4052         'MP': 'Northern Mariana Islands',
4053         'NO': 'Norway',
4054         'OM': 'Oman',
4055         'PK': 'Pakistan',
4056         'PW': 'Palau',
4057         'PS': 'Palestine, State of',
4058         'PA': 'Panama',
4059         'PG': 'Papua New Guinea',
4060         'PY': 'Paraguay',
4061         'PE': 'Peru',
4062         'PH': 'Philippines',
4063         'PN': 'Pitcairn',
4064         'PL': 'Poland',
4065         'PT': 'Portugal',
4066         'PR': 'Puerto Rico',
4067         'QA': 'Qatar',
4068         'RE': 'Réunion',
4069         'RO': 'Romania',
4070         'RU': 'Russian Federation',
4071         'RW': 'Rwanda',
4072         'BL': 'Saint Barthélemy',
4073         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4074         'KN': 'Saint Kitts and Nevis',
4075         'LC': 'Saint Lucia',
4076         'MF': 'Saint Martin (French part)',
4077         'PM': 'Saint Pierre and Miquelon',
4078         'VC': 'Saint Vincent and the Grenadines',
4079         'WS': 'Samoa',
4080         'SM': 'San Marino',
4081         'ST': 'Sao Tome and Principe',
4082         'SA': 'Saudi Arabia',
4083         'SN': 'Senegal',
4084         'RS': 'Serbia',
4085         'SC': 'Seychelles',
4086         'SL': 'Sierra Leone',
4087         'SG': 'Singapore',
4088         'SX': 'Sint Maarten (Dutch part)',
4089         'SK': 'Slovakia',
4090         'SI': 'Slovenia',
4091         'SB': 'Solomon Islands',
4092         'SO': 'Somalia',
4093         'ZA': 'South Africa',
4094         'GS': 'South Georgia and the South Sandwich Islands',
4095         'SS': 'South Sudan',
4096         'ES': 'Spain',
4097         'LK': 'Sri Lanka',
4098         'SD': 'Sudan',
4099         'SR': 'Suriname',
4100         'SJ': 'Svalbard and Jan Mayen',
4101         'SZ': 'Swaziland',
4102         'SE': 'Sweden',
4103         'CH': 'Switzerland',
4104         'SY': 'Syrian Arab Republic',
4105         'TW': 'Taiwan, Province of China',
4106         'TJ': 'Tajikistan',
4107         'TZ': 'Tanzania, United Republic of',
4108         'TH': 'Thailand',
4109         'TL': 'Timor-Leste',
4110         'TG': 'Togo',
4111         'TK': 'Tokelau',
4112         'TO': 'Tonga',
4113         'TT': 'Trinidad and Tobago',
4114         'TN': 'Tunisia',
4115         'TR': 'Turkey',
4116         'TM': 'Turkmenistan',
4117         'TC': 'Turks and Caicos Islands',
4118         'TV': 'Tuvalu',
4119         'UG': 'Uganda',
4120         'UA': 'Ukraine',
4121         'AE': 'United Arab Emirates',
4122         'GB': 'United Kingdom',
4123         'US': 'United States',
4124         'UM': 'United States Minor Outlying Islands',
4125         'UY': 'Uruguay',
4126         'UZ': 'Uzbekistan',
4127         'VU': 'Vanuatu',
4128         'VE': 'Venezuela, Bolivarian Republic of',
4129         'VN': 'Viet Nam',
4130         'VG': 'Virgin Islands, British',
4131         'VI': 'Virgin Islands, U.S.',
4132         'WF': 'Wallis and Futuna',
4133         'EH': 'Western Sahara',
4134         'YE': 'Yemen',
4135         'ZM': 'Zambia',
4136         'ZW': 'Zimbabwe',
4137     }
4138
4139     @classmethod
4140     def short2full(cls, code):
4141         """Convert an ISO 3166-2 country code to the corresponding full name"""
4142         return cls._country_map.get(code.upper())
4143
4144
4145 class GeoUtils(object):
4146     # Major IPv4 address blocks per country
4147     _country_ip_map = {
4148         'AD': '46.172.224.0/19',
4149         'AE': '94.200.0.0/13',
4150         'AF': '149.54.0.0/17',
4151         'AG': '209.59.64.0/18',
4152         'AI': '204.14.248.0/21',
4153         'AL': '46.99.0.0/16',
4154         'AM': '46.70.0.0/15',
4155         'AO': '105.168.0.0/13',
4156         'AP': '182.50.184.0/21',
4157         'AQ': '23.154.160.0/24',
4158         'AR': '181.0.0.0/12',
4159         'AS': '202.70.112.0/20',
4160         'AT': '77.116.0.0/14',
4161         'AU': '1.128.0.0/11',
4162         'AW': '181.41.0.0/18',
4163         'AX': '185.217.4.0/22',
4164         'AZ': '5.197.0.0/16',
4165         'BA': '31.176.128.0/17',
4166         'BB': '65.48.128.0/17',
4167         'BD': '114.130.0.0/16',
4168         'BE': '57.0.0.0/8',
4169         'BF': '102.178.0.0/15',
4170         'BG': '95.42.0.0/15',
4171         'BH': '37.131.0.0/17',
4172         'BI': '154.117.192.0/18',
4173         'BJ': '137.255.0.0/16',
4174         'BL': '185.212.72.0/23',
4175         'BM': '196.12.64.0/18',
4176         'BN': '156.31.0.0/16',
4177         'BO': '161.56.0.0/16',
4178         'BQ': '161.0.80.0/20',
4179         'BR': '191.128.0.0/12',
4180         'BS': '24.51.64.0/18',
4181         'BT': '119.2.96.0/19',
4182         'BW': '168.167.0.0/16',
4183         'BY': '178.120.0.0/13',
4184         'BZ': '179.42.192.0/18',
4185         'CA': '99.224.0.0/11',
4186         'CD': '41.243.0.0/16',
4187         'CF': '197.242.176.0/21',
4188         'CG': '160.113.0.0/16',
4189         'CH': '85.0.0.0/13',
4190         'CI': '102.136.0.0/14',
4191         'CK': '202.65.32.0/19',
4192         'CL': '152.172.0.0/14',
4193         'CM': '102.244.0.0/14',
4194         'CN': '36.128.0.0/10',
4195         'CO': '181.240.0.0/12',
4196         'CR': '201.192.0.0/12',
4197         'CU': '152.206.0.0/15',
4198         'CV': '165.90.96.0/19',
4199         'CW': '190.88.128.0/17',
4200         'CY': '31.153.0.0/16',
4201         'CZ': '88.100.0.0/14',
4202         'DE': '53.0.0.0/8',
4203         'DJ': '197.241.0.0/17',
4204         'DK': '87.48.0.0/12',
4205         'DM': '192.243.48.0/20',
4206         'DO': '152.166.0.0/15',
4207         'DZ': '41.96.0.0/12',
4208         'EC': '186.68.0.0/15',
4209         'EE': '90.190.0.0/15',
4210         'EG': '156.160.0.0/11',
4211         'ER': '196.200.96.0/20',
4212         'ES': '88.0.0.0/11',
4213         'ET': '196.188.0.0/14',
4214         'EU': '2.16.0.0/13',
4215         'FI': '91.152.0.0/13',
4216         'FJ': '144.120.0.0/16',
4217         'FK': '80.73.208.0/21',
4218         'FM': '119.252.112.0/20',
4219         'FO': '88.85.32.0/19',
4220         'FR': '90.0.0.0/9',
4221         'GA': '41.158.0.0/15',
4222         'GB': '25.0.0.0/8',
4223         'GD': '74.122.88.0/21',
4224         'GE': '31.146.0.0/16',
4225         'GF': '161.22.64.0/18',
4226         'GG': '62.68.160.0/19',
4227         'GH': '154.160.0.0/12',
4228         'GI': '95.164.0.0/16',
4229         'GL': '88.83.0.0/19',
4230         'GM': '160.182.0.0/15',
4231         'GN': '197.149.192.0/18',
4232         'GP': '104.250.0.0/19',
4233         'GQ': '105.235.224.0/20',
4234         'GR': '94.64.0.0/13',
4235         'GT': '168.234.0.0/16',
4236         'GU': '168.123.0.0/16',
4237         'GW': '197.214.80.0/20',
4238         'GY': '181.41.64.0/18',
4239         'HK': '113.252.0.0/14',
4240         'HN': '181.210.0.0/16',
4241         'HR': '93.136.0.0/13',
4242         'HT': '148.102.128.0/17',
4243         'HU': '84.0.0.0/14',
4244         'ID': '39.192.0.0/10',
4245         'IE': '87.32.0.0/12',
4246         'IL': '79.176.0.0/13',
4247         'IM': '5.62.80.0/20',
4248         'IN': '117.192.0.0/10',
4249         'IO': '203.83.48.0/21',
4250         'IQ': '37.236.0.0/14',
4251         'IR': '2.176.0.0/12',
4252         'IS': '82.221.0.0/16',
4253         'IT': '79.0.0.0/10',
4254         'JE': '87.244.64.0/18',
4255         'JM': '72.27.0.0/17',
4256         'JO': '176.29.0.0/16',
4257         'JP': '133.0.0.0/8',
4258         'KE': '105.48.0.0/12',
4259         'KG': '158.181.128.0/17',
4260         'KH': '36.37.128.0/17',
4261         'KI': '103.25.140.0/22',
4262         'KM': '197.255.224.0/20',
4263         'KN': '198.167.192.0/19',
4264         'KP': '175.45.176.0/22',
4265         'KR': '175.192.0.0/10',
4266         'KW': '37.36.0.0/14',
4267         'KY': '64.96.0.0/15',
4268         'KZ': '2.72.0.0/13',
4269         'LA': '115.84.64.0/18',
4270         'LB': '178.135.0.0/16',
4271         'LC': '24.92.144.0/20',
4272         'LI': '82.117.0.0/19',
4273         'LK': '112.134.0.0/15',
4274         'LR': '102.183.0.0/16',
4275         'LS': '129.232.0.0/17',
4276         'LT': '78.56.0.0/13',
4277         'LU': '188.42.0.0/16',
4278         'LV': '46.109.0.0/16',
4279         'LY': '41.252.0.0/14',
4280         'MA': '105.128.0.0/11',
4281         'MC': '88.209.64.0/18',
4282         'MD': '37.246.0.0/16',
4283         'ME': '178.175.0.0/17',
4284         'MF': '74.112.232.0/21',
4285         'MG': '154.126.0.0/17',
4286         'MH': '117.103.88.0/21',
4287         'MK': '77.28.0.0/15',
4288         'ML': '154.118.128.0/18',
4289         'MM': '37.111.0.0/17',
4290         'MN': '49.0.128.0/17',
4291         'MO': '60.246.0.0/16',
4292         'MP': '202.88.64.0/20',
4293         'MQ': '109.203.224.0/19',
4294         'MR': '41.188.64.0/18',
4295         'MS': '208.90.112.0/22',
4296         'MT': '46.11.0.0/16',
4297         'MU': '105.16.0.0/12',
4298         'MV': '27.114.128.0/18',
4299         'MW': '102.70.0.0/15',
4300         'MX': '187.192.0.0/11',
4301         'MY': '175.136.0.0/13',
4302         'MZ': '197.218.0.0/15',
4303         'NA': '41.182.0.0/16',
4304         'NC': '101.101.0.0/18',
4305         'NE': '197.214.0.0/18',
4306         'NF': '203.17.240.0/22',
4307         'NG': '105.112.0.0/12',
4308         'NI': '186.76.0.0/15',
4309         'NL': '145.96.0.0/11',
4310         'NO': '84.208.0.0/13',
4311         'NP': '36.252.0.0/15',
4312         'NR': '203.98.224.0/19',
4313         'NU': '49.156.48.0/22',
4314         'NZ': '49.224.0.0/14',
4315         'OM': '5.36.0.0/15',
4316         'PA': '186.72.0.0/15',
4317         'PE': '186.160.0.0/14',
4318         'PF': '123.50.64.0/18',
4319         'PG': '124.240.192.0/19',
4320         'PH': '49.144.0.0/13',
4321         'PK': '39.32.0.0/11',
4322         'PL': '83.0.0.0/11',
4323         'PM': '70.36.0.0/20',
4324         'PR': '66.50.0.0/16',
4325         'PS': '188.161.0.0/16',
4326         'PT': '85.240.0.0/13',
4327         'PW': '202.124.224.0/20',
4328         'PY': '181.120.0.0/14',
4329         'QA': '37.210.0.0/15',
4330         'RE': '102.35.0.0/16',
4331         'RO': '79.112.0.0/13',
4332         'RS': '93.86.0.0/15',
4333         'RU': '5.136.0.0/13',
4334         'RW': '41.186.0.0/16',
4335         'SA': '188.48.0.0/13',
4336         'SB': '202.1.160.0/19',
4337         'SC': '154.192.0.0/11',
4338         'SD': '102.120.0.0/13',
4339         'SE': '78.64.0.0/12',
4340         'SG': '8.128.0.0/10',
4341         'SI': '188.196.0.0/14',
4342         'SK': '78.98.0.0/15',
4343         'SL': '102.143.0.0/17',
4344         'SM': '89.186.32.0/19',
4345         'SN': '41.82.0.0/15',
4346         'SO': '154.115.192.0/18',
4347         'SR': '186.179.128.0/17',
4348         'SS': '105.235.208.0/21',
4349         'ST': '197.159.160.0/19',
4350         'SV': '168.243.0.0/16',
4351         'SX': '190.102.0.0/20',
4352         'SY': '5.0.0.0/16',
4353         'SZ': '41.84.224.0/19',
4354         'TC': '65.255.48.0/20',
4355         'TD': '154.68.128.0/19',
4356         'TG': '196.168.0.0/14',
4357         'TH': '171.96.0.0/13',
4358         'TJ': '85.9.128.0/18',
4359         'TK': '27.96.24.0/21',
4360         'TL': '180.189.160.0/20',
4361         'TM': '95.85.96.0/19',
4362         'TN': '197.0.0.0/11',
4363         'TO': '175.176.144.0/21',
4364         'TR': '78.160.0.0/11',
4365         'TT': '186.44.0.0/15',
4366         'TV': '202.2.96.0/19',
4367         'TW': '120.96.0.0/11',
4368         'TZ': '156.156.0.0/14',
4369         'UA': '37.52.0.0/14',
4370         'UG': '102.80.0.0/13',
4371         'US': '6.0.0.0/8',
4372         'UY': '167.56.0.0/13',
4373         'UZ': '84.54.64.0/18',
4374         'VA': '212.77.0.0/19',
4375         'VC': '207.191.240.0/21',
4376         'VE': '186.88.0.0/13',
4377         'VG': '66.81.192.0/20',
4378         'VI': '146.226.0.0/16',
4379         'VN': '14.160.0.0/11',
4380         'VU': '202.80.32.0/20',
4381         'WF': '117.20.32.0/21',
4382         'WS': '202.4.32.0/19',
4383         'YE': '134.35.0.0/16',
4384         'YT': '41.242.116.0/22',
4385         'ZA': '41.0.0.0/11',
4386         'ZM': '102.144.0.0/13',
4387         'ZW': '102.177.192.0/18',
4388     }
4389
4390     @classmethod
4391     def random_ipv4(cls, code_or_block):
4392         if len(code_or_block) == 2:
4393             block = cls._country_ip_map.get(code_or_block.upper())
4394             if not block:
4395                 return None
4396         else:
4397             block = code_or_block
4398         addr, preflen = block.split('/')
4399         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
4400         addr_max = addr_min | (0xffffffff >> int(preflen))
4401         return compat_str(socket.inet_ntoa(
4402             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
4403
4404
4405 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
4406     def __init__(self, proxies=None):
4407         # Set default handlers
4408         for type in ('http', 'https'):
4409             setattr(self, '%s_open' % type,
4410                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
4411                         meth(r, proxy, type))
4412         compat_urllib_request.ProxyHandler.__init__(self, proxies)
4413
4414     def proxy_open(self, req, proxy, type):
4415         req_proxy = req.headers.get('Ytdl-request-proxy')
4416         if req_proxy is not None:
4417             proxy = req_proxy
4418             del req.headers['Ytdl-request-proxy']
4419
4420         if proxy == '__noproxy__':
4421             return None  # No Proxy
4422         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
4423             req.add_header('Ytdl-socks-proxy', proxy)
4424             # yt-dlp's http/https handlers do wrapping the socket with socks
4425             return None
4426         return compat_urllib_request.ProxyHandler.proxy_open(
4427             self, req, proxy, type)
4428
4429
4430 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
4431 # released into Public Domain
4432 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
4433
4434 def long_to_bytes(n, blocksize=0):
4435     """long_to_bytes(n:long, blocksize:int) : string
4436     Convert a long integer to a byte string.
4437
4438     If optional blocksize is given and greater than zero, pad the front of the
4439     byte string with binary zeros so that the length is a multiple of
4440     blocksize.
4441     """
4442     # after much testing, this algorithm was deemed to be the fastest
4443     s = b''
4444     n = int(n)
4445     while n > 0:
4446         s = compat_struct_pack('>I', n & 0xffffffff) + s
4447         n = n >> 32
4448     # strip off leading zeros
4449     for i in range(len(s)):
4450         if s[i] != b'\000'[0]:
4451             break
4452     else:
4453         # only happens when n == 0
4454         s = b'\000'
4455         i = 0
4456     s = s[i:]
4457     # add back some pad bytes.  this could be done more efficiently w.r.t. the
4458     # de-padding being done above, but sigh...
4459     if blocksize > 0 and len(s) % blocksize:
4460         s = (blocksize - len(s) % blocksize) * b'\000' + s
4461     return s
4462
4463
4464 def bytes_to_long(s):
4465     """bytes_to_long(string) : long
4466     Convert a byte string to a long integer.
4467
4468     This is (essentially) the inverse of long_to_bytes().
4469     """
4470     acc = 0
4471     length = len(s)
4472     if length % 4:
4473         extra = (4 - length % 4)
4474         s = b'\000' * extra + s
4475         length = length + extra
4476     for i in range(0, length, 4):
4477         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
4478     return acc
4479
4480
4481 def ohdave_rsa_encrypt(data, exponent, modulus):
4482     '''
4483     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
4484
4485     Input:
4486         data: data to encrypt, bytes-like object
4487         exponent, modulus: parameter e and N of RSA algorithm, both integer
4488     Output: hex string of encrypted data
4489
4490     Limitation: supports one block encryption only
4491     '''
4492
4493     payload = int(binascii.hexlify(data[::-1]), 16)
4494     encrypted = pow(payload, exponent, modulus)
4495     return '%x' % encrypted
4496
4497
4498 def pkcs1pad(data, length):
4499     """
4500     Padding input data with PKCS#1 scheme
4501
4502     @param {int[]} data        input data
4503     @param {int}   length      target length
4504     @returns {int[]}           padded data
4505     """
4506     if len(data) > length - 11:
4507         raise ValueError('Input data too long for PKCS#1 padding')
4508
4509     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
4510     return [0, 2] + pseudo_random + [0] + data
4511
4512
4513 def encode_base_n(num, n, table=None):
4514     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
4515     if not table:
4516         table = FULL_TABLE[:n]
4517
4518     if n > len(table):
4519         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
4520
4521     if num == 0:
4522         return table[0]
4523
4524     ret = ''
4525     while num:
4526         ret = table[num % n] + ret
4527         num = num // n
4528     return ret
4529
4530
4531 def decode_packed_codes(code):
4532     mobj = re.search(PACKED_CODES_RE, code)
4533     obfuscated_code, base, count, symbols = mobj.groups()
4534     base = int(base)
4535     count = int(count)
4536     symbols = symbols.split('|')
4537     symbol_table = {}
4538
4539     while count:
4540         count -= 1
4541         base_n_count = encode_base_n(count, base)
4542         symbol_table[base_n_count] = symbols[count] or base_n_count
4543
4544     return re.sub(
4545         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
4546         obfuscated_code)
4547
4548
4549 def caesar(s, alphabet, shift):
4550     if shift == 0:
4551         return s
4552     l = len(alphabet)
4553     return ''.join(
4554         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
4555         for c in s)
4556
4557
4558 def rot47(s):
4559     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
4560
4561
4562 def parse_m3u8_attributes(attrib):
4563     info = {}
4564     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
4565         if val.startswith('"'):
4566             val = val[1:-1]
4567         info[key] = val
4568     return info
4569
4570
4571 def urshift(val, n):
4572     return val >> n if val >= 0 else (val + 0x100000000) >> n
4573
4574
4575 # Based on png2str() written by @gdkchan and improved by @yokrysty
4576 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
4577 def decode_png(png_data):
4578     # Reference: https://www.w3.org/TR/PNG/
4579     header = png_data[8:]
4580
4581     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
4582         raise IOError('Not a valid PNG file.')
4583
4584     int_map = {1: '>B', 2: '>H', 4: '>I'}
4585     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
4586
4587     chunks = []
4588
4589     while header:
4590         length = unpack_integer(header[:4])
4591         header = header[4:]
4592
4593         chunk_type = header[:4]
4594         header = header[4:]
4595
4596         chunk_data = header[:length]
4597         header = header[length:]
4598
4599         header = header[4:]  # Skip CRC
4600
4601         chunks.append({
4602             'type': chunk_type,
4603             'length': length,
4604             'data': chunk_data
4605         })
4606
4607     ihdr = chunks[0]['data']
4608
4609     width = unpack_integer(ihdr[:4])
4610     height = unpack_integer(ihdr[4:8])
4611
4612     idat = b''
4613
4614     for chunk in chunks:
4615         if chunk['type'] == b'IDAT':
4616             idat += chunk['data']
4617
4618     if not idat:
4619         raise IOError('Unable to read PNG data.')
4620
4621     decompressed_data = bytearray(zlib.decompress(idat))
4622
4623     stride = width * 3
4624     pixels = []
4625
4626     def _get_pixel(idx):
4627         x = idx % stride
4628         y = idx // stride
4629         return pixels[y][x]
4630
4631     for y in range(height):
4632         basePos = y * (1 + stride)
4633         filter_type = decompressed_data[basePos]
4634
4635         current_row = []
4636
4637         pixels.append(current_row)
4638
4639         for x in range(stride):
4640             color = decompressed_data[1 + basePos + x]
4641             basex = y * stride + x
4642             left = 0
4643             up = 0
4644
4645             if x > 2:
4646                 left = _get_pixel(basex - 3)
4647             if y > 0:
4648                 up = _get_pixel(basex - stride)
4649
4650             if filter_type == 1:  # Sub
4651                 color = (color + left) & 0xff
4652             elif filter_type == 2:  # Up
4653                 color = (color + up) & 0xff
4654             elif filter_type == 3:  # Average
4655                 color = (color + ((left + up) >> 1)) & 0xff
4656             elif filter_type == 4:  # Paeth
4657                 a = left
4658                 b = up
4659                 c = 0
4660
4661                 if x > 2 and y > 0:
4662                     c = _get_pixel(basex - stride - 3)
4663
4664                 p = a + b - c
4665
4666                 pa = abs(p - a)
4667                 pb = abs(p - b)
4668                 pc = abs(p - c)
4669
4670                 if pa <= pb and pa <= pc:
4671                     color = (color + a) & 0xff
4672                 elif pb <= pc:
4673                     color = (color + b) & 0xff
4674                 else:
4675                     color = (color + c) & 0xff
4676
4677             current_row.append(color)
4678
4679     return width, height, pixels
4680
4681
4682 def write_xattr(path, key, value):
4683     # This mess below finds the best xattr tool for the job
4684     try:
4685         # try the pyxattr module...
4686         import xattr
4687
4688         if hasattr(xattr, 'set'):  # pyxattr
4689             # Unicode arguments are not supported in python-pyxattr until
4690             # version 0.5.0
4691             # See https://github.com/ytdl-org/youtube-dl/issues/5498
4692             pyxattr_required_version = '0.5.0'
4693             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
4694                 # TODO: fallback to CLI tools
4695                 raise XAttrUnavailableError(
4696                     'python-pyxattr is detected but is too old. '
4697                     'yt-dlp requires %s or above while your version is %s. '
4698                     'Falling back to other xattr implementations' % (
4699                         pyxattr_required_version, xattr.__version__))
4700
4701             setxattr = xattr.set
4702         else:  # xattr
4703             setxattr = xattr.setxattr
4704
4705         try:
4706             setxattr(path, key, value)
4707         except EnvironmentError as e:
4708             raise XAttrMetadataError(e.errno, e.strerror)
4709
4710     except ImportError:
4711         if compat_os_name == 'nt':
4712             # Write xattrs to NTFS Alternate Data Streams:
4713             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
4714             assert ':' not in key
4715             assert os.path.exists(path)
4716
4717             ads_fn = path + ':' + key
4718             try:
4719                 with open(ads_fn, 'wb') as f:
4720                     f.write(value)
4721             except EnvironmentError as e:
4722                 raise XAttrMetadataError(e.errno, e.strerror)
4723         else:
4724             user_has_setfattr = check_executable('setfattr', ['--version'])
4725             user_has_xattr = check_executable('xattr', ['-h'])
4726
4727             if user_has_setfattr or user_has_xattr:
4728
4729                 value = value.decode('utf-8')
4730                 if user_has_setfattr:
4731                     executable = 'setfattr'
4732                     opts = ['-n', key, '-v', value]
4733                 elif user_has_xattr:
4734                     executable = 'xattr'
4735                     opts = ['-w', key, value]
4736
4737                 cmd = ([encodeFilename(executable, True)]
4738                        + [encodeArgument(o) for o in opts]
4739                        + [encodeFilename(path, True)])
4740
4741                 try:
4742                     p = Popen(
4743                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
4744                 except EnvironmentError as e:
4745                     raise XAttrMetadataError(e.errno, e.strerror)
4746                 stdout, stderr = p.communicate_or_kill()
4747                 stderr = stderr.decode('utf-8', 'replace')
4748                 if p.returncode != 0:
4749                     raise XAttrMetadataError(p.returncode, stderr)
4750
4751             else:
4752                 # On Unix, and can't find pyxattr, setfattr, or xattr.
4753                 if sys.platform.startswith('linux'):
4754                     raise XAttrUnavailableError(
4755                         "Couldn't find a tool to set the xattrs. "
4756                         "Install either the python 'pyxattr' or 'xattr' "
4757                         "modules, or the GNU 'attr' package "
4758                         "(which contains the 'setfattr' tool).")
4759                 else:
4760                     raise XAttrUnavailableError(
4761                         "Couldn't find a tool to set the xattrs. "
4762                         "Install either the python 'xattr' module, "
4763                         "or the 'xattr' binary.")
4764
4765
4766 def random_birthday(year_field, month_field, day_field):
4767     start_date = datetime.date(1950, 1, 1)
4768     end_date = datetime.date(1995, 12, 31)
4769     offset = random.randint(0, (end_date - start_date).days)
4770     random_date = start_date + datetime.timedelta(offset)
4771     return {
4772         year_field: str(random_date.year),
4773         month_field: str(random_date.month),
4774         day_field: str(random_date.day),
4775     }
4776
4777
4778 # Templates for internet shortcut files, which are plain text files.
4779 DOT_URL_LINK_TEMPLATE = '''
4780 [InternetShortcut]
4781 URL=%(url)s
4782 '''.lstrip()
4783
4784 DOT_WEBLOC_LINK_TEMPLATE = '''
4785 <?xml version="1.0" encoding="UTF-8"?>
4786 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
4787 <plist version="1.0">
4788 <dict>
4789 \t<key>URL</key>
4790 \t<string>%(url)s</string>
4791 </dict>
4792 </plist>
4793 '''.lstrip()
4794
4795 DOT_DESKTOP_LINK_TEMPLATE = '''
4796 [Desktop Entry]
4797 Encoding=UTF-8
4798 Name=%(filename)s
4799 Type=Link
4800 URL=%(url)s
4801 Icon=text-html
4802 '''.lstrip()
4803
4804 LINK_TEMPLATES = {
4805     'url': DOT_URL_LINK_TEMPLATE,
4806     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
4807     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
4808 }
4809
4810
4811 def iri_to_uri(iri):
4812     """
4813     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
4814
4815     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
4816     """
4817
4818     iri_parts = compat_urllib_parse_urlparse(iri)
4819
4820     if '[' in iri_parts.netloc:
4821         raise ValueError('IPv6 URIs are not, yet, supported.')
4822         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
4823
4824     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
4825
4826     net_location = ''
4827     if iri_parts.username:
4828         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
4829         if iri_parts.password is not None:
4830             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
4831         net_location += '@'
4832
4833     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
4834     # The 'idna' encoding produces ASCII text.
4835     if iri_parts.port is not None and iri_parts.port != 80:
4836         net_location += ':' + str(iri_parts.port)
4837
4838     return compat_urllib_parse_urlunparse(
4839         (iri_parts.scheme,
4840             net_location,
4841
4842             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
4843
4844             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
4845             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
4846
4847             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
4848             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
4849
4850             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
4851
4852     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
4853
4854
4855 def to_high_limit_path(path):
4856     if sys.platform in ['win32', 'cygwin']:
4857         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
4858         return r'\\?\ '.rstrip() + os.path.abspath(path)
4859
4860     return path
4861
4862
4863 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
4864     if field is None:
4865         val = obj if obj is not None else default
4866     else:
4867         val = obj.get(field, default)
4868     if func and val not in ignore:
4869         val = func(val)
4870     return template % val if val not in ignore else default
4871
4872
4873 def clean_podcast_url(url):
4874     return re.sub(r'''(?x)
4875         (?:
4876             (?:
4877                 chtbl\.com/track|
4878                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
4879                 play\.podtrac\.com
4880             )/[^/]+|
4881             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
4882             flex\.acast\.com|
4883             pd(?:
4884                 cn\.co| # https://podcorn.com/analytics-prefix/
4885                 st\.fm # https://podsights.com/docs/
4886             )/e
4887         )/''', '', url)
4888
4889
4890 _HEX_TABLE = '0123456789abcdef'
4891
4892
4893 def random_uuidv4():
4894     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
4895
4896
4897 def make_dir(path, to_screen=None):
4898     try:
4899         dn = os.path.dirname(path)
4900         if dn and not os.path.exists(dn):
4901             os.makedirs(dn)
4902         return True
4903     except (OSError, IOError) as err:
4904         if callable(to_screen) is not None:
4905             to_screen('unable to create directory ' + error_to_compat_str(err))
4906         return False
4907
4908
4909 def get_executable_path():
4910     from zipimport import zipimporter
4911     if hasattr(sys, 'frozen'):  # Running from PyInstaller
4912         path = os.path.dirname(sys.executable)
4913     elif isinstance(globals().get('__loader__'), zipimporter):  # Running from ZIP
4914         path = os.path.join(os.path.dirname(__file__), '../..')
4915     else:
4916         path = os.path.join(os.path.dirname(__file__), '..')
4917     return os.path.abspath(path)
4918
4919
4920 def load_plugins(name, suffix, namespace):
4921     classes = {}
4922     try:
4923         plugins_spec = importlib.util.spec_from_file_location(
4924             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
4925         plugins = importlib.util.module_from_spec(plugins_spec)
4926         sys.modules[plugins_spec.name] = plugins
4927         plugins_spec.loader.exec_module(plugins)
4928         for name in dir(plugins):
4929             if name in namespace:
4930                 continue
4931             if not name.endswith(suffix):
4932                 continue
4933             klass = getattr(plugins, name)
4934             classes[name] = namespace[name] = klass
4935     except FileNotFoundError:
4936         pass
4937     return classes
4938
4939
4940 def traverse_obj(
4941         obj, *path_list, default=None, expected_type=None, get_all=True,
4942         casesense=True, is_user_input=False, traverse_string=False):
4943     ''' Traverse nested list/dict/tuple
4944     @param path_list        A list of paths which are checked one by one.
4945                             Each path is a list of keys where each key is a string,
4946                             a function, a tuple of strings/None or "...".
4947                             When a fuction is given, it takes the key as argument and
4948                             returns whether the key matches or not. When a tuple is given,
4949                             all the keys given in the tuple are traversed, and
4950                             "..." traverses all the keys in the object
4951                             "None" returns the object without traversal
4952     @param default          Default value to return
4953     @param expected_type    Only accept final value of this type (Can also be any callable)
4954     @param get_all          Return all the values obtained from a path or only the first one
4955     @param casesense        Whether to consider dictionary keys as case sensitive
4956     @param is_user_input    Whether the keys are generated from user input. If True,
4957                             strings are converted to int/slice if necessary
4958     @param traverse_string  Whether to traverse inside strings. If True, any
4959                             non-compatible object will also be converted into a string
4960     # TODO: Write tests
4961     '''
4962     if not casesense:
4963         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
4964         path_list = (map(_lower, variadic(path)) for path in path_list)
4965
4966     def _traverse_obj(obj, path, _current_depth=0):
4967         nonlocal depth
4968         path = tuple(variadic(path))
4969         for i, key in enumerate(path):
4970             if None in (key, obj):
4971                 return obj
4972             if isinstance(key, (list, tuple)):
4973                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
4974                 key = ...
4975             if key is ...:
4976                 obj = (obj.values() if isinstance(obj, dict)
4977                        else obj if isinstance(obj, (list, tuple, LazyList))
4978                        else str(obj) if traverse_string else [])
4979                 _current_depth += 1
4980                 depth = max(depth, _current_depth)
4981                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
4982             elif callable(key):
4983                 if isinstance(obj, (list, tuple, LazyList)):
4984                     obj = enumerate(obj)
4985                 elif isinstance(obj, dict):
4986                     obj = obj.items()
4987                 else:
4988                     if not traverse_string:
4989                         return None
4990                     obj = str(obj)
4991                 _current_depth += 1
4992                 depth = max(depth, _current_depth)
4993                 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)]
4994             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
4995                 obj = (obj.get(key) if casesense or (key in obj)
4996                        else next((v for k, v in obj.items() if _lower(k) == key), None))
4997             else:
4998                 if is_user_input:
4999                     key = (int_or_none(key) if ':' not in key
5000                            else slice(*map(int_or_none, key.split(':'))))
5001                     if key == slice(None):
5002                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
5003                 if not isinstance(key, (int, slice)):
5004                     return None
5005                 if not isinstance(obj, (list, tuple, LazyList)):
5006                     if not traverse_string:
5007                         return None
5008                     obj = str(obj)
5009                 try:
5010                     obj = obj[key]
5011                 except IndexError:
5012                     return None
5013         return obj
5014
5015     if isinstance(expected_type, type):
5016         type_test = lambda val: val if isinstance(val, expected_type) else None
5017     elif expected_type is not None:
5018         type_test = expected_type
5019     else:
5020         type_test = lambda val: val
5021
5022     for path in path_list:
5023         depth = 0
5024         val = _traverse_obj(obj, path)
5025         if val is not None:
5026             if depth:
5027                 for _ in range(depth - 1):
5028                     val = itertools.chain.from_iterable(v for v in val if v is not None)
5029                 val = [v for v in map(type_test, val) if v is not None]
5030                 if val:
5031                     return val if get_all else val[0]
5032             else:
5033                 val = type_test(val)
5034                 if val is not None:
5035                     return val
5036     return default
5037
5038
5039 # Deprecated
5040 def traverse_dict(dictn, keys, casesense=True):
5041     write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
5042                  'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
5043     return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
5044
5045
5046 def variadic(x, allowed_types=(str, bytes, dict)):
5047     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
5048
5049
5050 # create a JSON Web Signature (jws) with HS256 algorithm
5051 # the resulting format is in JWS Compact Serialization
5052 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
5053 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
5054 def jwt_encode_hs256(payload_data, key, headers={}):
5055     header_data = {
5056         'alg': 'HS256',
5057         'typ': 'JWT',
5058     }
5059     if headers:
5060         header_data.update(headers)
5061     header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
5062     payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
5063     h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
5064     signature_b64 = base64.b64encode(h.digest())
5065     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
5066     return token
5067
5068
5069 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
5070 def jwt_decode_hs256(jwt):
5071     header_b64, payload_b64, signature_b64 = jwt.split('.')
5072     payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
5073     return payload_data
5074
5075
5076 def supports_terminal_sequences(stream):
5077     if compat_os_name == 'nt':
5078         from .compat import WINDOWS_VT_MODE  # Must be imported locally
5079         if not WINDOWS_VT_MODE or get_windows_version() < (10, 0, 10586):
5080             return False
5081     elif not os.getenv('TERM'):
5082         return False
5083     try:
5084         return stream.isatty()
5085     except BaseException:
5086         return False
5087
5088
5089 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
5090
5091
5092 def remove_terminal_sequences(string):
5093     return _terminal_sequences_re.sub('', string)
5094
5095
5096 def number_of_digits(number):
5097     return len('%d' % number)
5098
5099
5100 def join_nonempty(*values, delim='-', from_dict=None):
5101     if from_dict is not None:
5102         values = map(from_dict.get, values)
5103     return delim.join(map(str, filter(None, values)))
5104
5105
5106 class Config:
5107     own_args = None
5108     filename = None
5109     __initialized = False
5110
5111     def __init__(self, parser, label=None):
5112         self._parser, self.label = parser, label
5113         self._loaded_paths, self.configs = set(), []
5114
5115     def init(self, args=None, filename=None):
5116         assert not self.__initialized
5117         if filename:
5118             location = os.path.realpath(filename)
5119             if location in self._loaded_paths:
5120                 return False
5121             self._loaded_paths.add(location)
5122
5123         self.__initialized = True
5124         self.own_args, self.filename = args, filename
5125         for location in self._parser.parse_args(args)[0].config_locations or []:
5126             location = compat_expanduser(location)
5127             if os.path.isdir(location):
5128                 location = os.path.join(location, 'yt-dlp.conf')
5129             if not os.path.exists(location):
5130                 self._parser.error(f'config location {location} does not exist')
5131             self.append_config(self.read_file(location), location)
5132         return True
5133
5134     def __str__(self):
5135         label = join_nonempty(
5136             self.label, 'config', f'"{self.filename}"' if self.filename else '',
5137             delim=' ')
5138         return join_nonempty(
5139             self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
5140             *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
5141             delim='\n')
5142
5143     @staticmethod
5144     def read_file(filename, default=[]):
5145         try:
5146             optionf = open(filename)
5147         except IOError:
5148             return default  # silently skip if file is not present
5149         try:
5150             # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
5151             contents = optionf.read()
5152             if sys.version_info < (3,):
5153                 contents = contents.decode(preferredencoding())
5154             res = compat_shlex_split(contents, comments=True)
5155         finally:
5156             optionf.close()
5157         return res
5158
5159     @staticmethod
5160     def hide_login_info(opts):
5161         PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'])
5162         eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
5163
5164         def _scrub_eq(o):
5165             m = eqre.match(o)
5166             if m:
5167                 return m.group('key') + '=PRIVATE'
5168             else:
5169                 return o
5170
5171         opts = list(map(_scrub_eq, opts))
5172         for idx, opt in enumerate(opts):
5173             if opt in PRIVATE_OPTS and idx + 1 < len(opts):
5174                 opts[idx + 1] = 'PRIVATE'
5175         return opts
5176
5177     def append_config(self, *args, label=None):
5178         config = type(self)(self._parser, label)
5179         config._loaded_paths = self._loaded_paths
5180         if config.init(*args):
5181             self.configs.append(config)
5182
5183     @property
5184     def all_args(self):
5185         for config in reversed(self.configs):
5186             yield from config.all_args
5187         yield from self.own_args or []
5188
5189     def parse_args(self):
5190         return self._parser.parse_args(list(self.all_args))