-#!/usr/bin/env python3
+import asyncio
import atexit
import base64
import binascii
import codecs
import collections
import contextlib
-import ctypes
import datetime
import email.header
import email.utils
import gzip
import hashlib
import hmac
+import html.entities
+import html.parser
+import http.client
+import http.cookiejar
import importlib.util
+import inspect
import io
import itertools
import json
import shlex
import socket
import ssl
+import struct
import subprocess
import sys
import tempfile
import time
import traceback
import types
+import unicodedata
+import urllib.error
import urllib.parse
+import urllib.request
import xml.etree.ElementTree
import zlib
-from .compat import asyncio, functools # isort: split
+from .compat import functools # isort: split
from .compat import (
- compat_chr,
- compat_cookiejar,
compat_etree_fromstring,
compat_expanduser,
- compat_html_entities,
- compat_html_entities_html5,
compat_HTMLParseError,
- compat_HTMLParser,
- compat_http_client,
- compat_HTTPError,
compat_os_name,
- compat_parse_qs,
compat_shlex_quote,
- compat_str,
- compat_struct_pack,
- compat_struct_unpack,
- compat_urllib_error,
- compat_urllib_parse_unquote_plus,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_urlparse,
- compat_urllib_request,
- compat_urlparse,
)
-from .dependencies import brotli, certifi, websockets
+from .dependencies import brotli, certifi, websockets, xattr
from .socks import ProxyType, sockssocket
# In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
# URLs with protocols not in urlparse.uses_netloc are not handled correctly
for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
- if scheme not in compat_urlparse.uses_netloc:
- compat_urlparse.uses_netloc.append(scheme)
+ if scheme not in urllib.parse.uses_netloc:
+ urllib.parse.uses_netloc.append(scheme)
# This is not clearly defined otherwise
NO_DEFAULT = object()
+IDENTITY = lambda x: x
ENGLISH_MONTH_NAMES = [
'January', 'February', 'March', 'April', 'May', 'June',
'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
}
-KNOWN_EXTENSIONS = (
- 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
- 'flv', 'f4v', 'f4a', 'f4b',
- 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
- 'mkv', 'mka', 'mk3d',
- 'avi', 'divx',
- 'mov',
- 'asf', 'wmv', 'wma',
- '3gp', '3g2',
- 'mp3',
- 'flac',
- 'ape',
- 'wav',
- 'f4f', 'f4m', 'm3u8', 'smil')
+# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
+TIMEZONE_NAMES = {
+ 'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
+ 'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
+ 'EST': -5, 'EDT': -4, # Eastern
+ 'CST': -6, 'CDT': -5, # Central
+ 'MST': -7, 'MDT': -6, # Mountain
+ 'PST': -8, 'PDT': -7 # Pacific
+}
# needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
'%d/%m/%Y',
'%d/%m/%y',
'%d/%m/%Y %H:%M:%S',
+ '%d-%m-%Y %H:%M',
])
DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
])
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
-JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
+JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?})\s*</script>'
NUMBER_RE = r'\d+(?:\.\d+)?'
def _find_xpath(xpath):
return node.find(xpath)
- if isinstance(xpath, (str, compat_str)):
+ if isinstance(xpath, str):
n = _find_xpath(xpath)
else:
for xp in xpath:
)
-class HTMLBreakOnClosingTagParser(compat_HTMLParser):
+class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
"""
HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
closing tag for the first opening tag it has encountered, and can be used
def __init__(self):
self.tagstack = collections.deque()
- compat_HTMLParser.__init__(self)
+ html.parser.HTMLParser.__init__(self)
def __enter__(self):
return self
raise compat_HTMLParseError('unexpected end of html')
-class HTMLAttributeParser(compat_HTMLParser):
+class HTMLAttributeParser(html.parser.HTMLParser):
"""Trivial HTML parser to gather the attributes for a single element"""
def __init__(self):
self.attrs = {}
- compat_HTMLParser.__init__(self)
+ html.parser.HTMLParser.__init__(self)
def handle_starttag(self, tag, attrs):
self.attrs = dict(attrs)
-class HTMLListAttrsParser(compat_HTMLParser):
+class HTMLListAttrsParser(html.parser.HTMLParser):
"""HTML parser to gather the attributes for the elements of a list"""
def __init__(self):
- compat_HTMLParser.__init__(self)
+ html.parser.HTMLParser.__init__(self)
self.items = []
self._level = 0
if filename == '-':
if sys.platform == 'win32':
import msvcrt
- msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
+
+ # stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
+ with contextlib.suppress(io.UnsupportedOperation):
+ msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
for attempt in range(2):
return ACCENT_CHARS[char]
elif not restricted and char == '\n':
return '\0 '
+ elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?|/\\':
+ # Replace with their full-width unicode counterparts
+ return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
elif char == '?' or ord(char) < 32 or ord(char) == 127:
return ''
elif char == '"':
return '\0_'
return char
+ if restricted and is_id is NO_DEFAULT:
+ s = unicodedata.normalize('NFKC', s)
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
result = ''.join(map(replace_insane, s))
if is_id is NO_DEFAULT:
- result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result) # Remove repeated substitute chars
- STRIP_RE = '(?:\0.|[ _-])*'
+ result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result) # Remove repeated substitute chars
+ STRIP_RE = r'(?:\0.|[ _-])*'
result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result) # Remove substitute chars from start/end
result = result.replace('\0', '') or '_'
return os.path.join(*sanitized_path)
-def sanitize_url(url):
+def sanitize_url(url, *, scheme='http'):
# Prepend protocol-less URLs with `http:` scheme in order to mitigate
# the number of unwanted failures due to missing protocol
if url is None:
return
elif url.startswith('//'):
- return 'http:%s' % url
+ return f'{scheme}:{url}'
# Fix some common typos seen so far
COMMON_TYPOS = (
# https://github.com/ytdl-org/youtube-dl/issues/15649
def extract_basic_auth(url):
- parts = compat_urlparse.urlsplit(url)
+ parts = urllib.parse.urlsplit(url)
if parts.username is None:
return url, None
- url = compat_urlparse.urlunsplit(parts._replace(netloc=(
+ url = urllib.parse.urlunsplit(parts._replace(netloc=(
parts.hostname if parts.port is None
else '%s:%d' % (parts.hostname, parts.port))))
auth_payload = base64.b64encode(
if auth_header is not None:
headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
headers['Authorization'] = auth_header
- return compat_urllib_request.Request(url, *args, **kwargs)
+ return urllib.request.Request(url, *args, **kwargs)
def expand_path(s):
entity = entity_with_semicolon[:-1]
# Known non-numeric HTML entity
- if entity in compat_html_entities.name2codepoint:
- return compat_chr(compat_html_entities.name2codepoint[entity])
+ if entity in html.entities.name2codepoint:
+ return chr(html.entities.name2codepoint[entity])
- # TODO: HTML5 allows entities without a semicolon. For example,
- # 'Éric' should be decoded as 'Éric'.
- if entity_with_semicolon in compat_html_entities_html5:
- return compat_html_entities_html5[entity_with_semicolon]
+ # TODO: HTML5 allows entities without a semicolon.
+ # E.g. 'Éric' should be decoded as 'Éric'.
+ if entity_with_semicolon in html.entities.html5:
+ return html.entities.html5[entity_with_semicolon]
mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
if mobj is not None:
base = 10
# See https://github.com/ytdl-org/youtube-dl/issues/7518
with contextlib.suppress(ValueError):
- return compat_chr(int(numstr, base))
+ return chr(int(numstr, base))
# Unknown entity in name, return its literal representation
return '&%s;' % entity
def encodeArgument(s):
# Legacy code that uses byte strings
# Uncomment the following line after fixing all post processors
- # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
+ # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
return s if isinstance(s, str) else s.decode('ascii')
if isinstance(optval, bytes):
optval = optval.decode(preferredencoding())
- assert isinstance(optval, compat_str)
+ assert isinstance(optval, str)
return optval
if opts_check_certificate:
if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
context.load_verify_locations(cafile=certifi.where())
- try:
- context.load_default_certs()
- # Work around the issue in load_default_certs when there are bad certificates. See:
- # https://github.com/yt-dlp/yt-dlp/issues/1060,
- # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
- except ssl.SSLError:
- # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
- if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
- for storename in ('CA', 'ROOT'):
- _ssl_load_windows_store_certs(context, storename)
- context.set_default_verify_paths()
+ else:
+ try:
+ context.load_default_certs()
+ # Work around the issue in load_default_certs when there are bad certificates. See:
+ # https://github.com/yt-dlp/yt-dlp/issues/1060,
+ # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
+ except ssl.SSLError:
+ # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
+ if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
+ for storename in ('CA', 'ROOT'):
+ _ssl_load_windows_store_certs(context, storename)
+ context.set_default_verify_paths()
client_certfile = params.get('client_certificate')
if client_certfile:
def bug_reports_message(before=';'):
- msg = ('please report this issue on https://github.com/yt-dlp/yt-dlp/issues?q= , '
- 'filling out the appropriate issue template. '
- 'Confirm you are on the latest version using yt-dlp -U')
+ from .update import REPOSITORY
+
+ msg = (f'please report this issue on https://github.com/{REPOSITORY}/issues?q= , '
+ 'filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U')
before = before.rstrip()
if not before or before.endswith(('.', '!', '?')):
super().__init__(self.msg)
-network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
+network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
if hasattr(ssl, 'CertificateError'):
network_exceptions.append(ssl.CertificateError)
network_exceptions = tuple(network_exceptions)
self.video_id = video_id
self.ie = ie
self.exc_info = sys.exc_info() # preserve original exception
+ if isinstance(self.exc_info[1], ExtractorError):
+ self.exc_info = self.exc_info[1].exc_info
super().__init__(''.join((
format_field(ie, None, '[%s] '),
self.countries = countries
+class UserNotLive(ExtractorError):
+ """Error when a channel/user is not live"""
+
+ def __init__(self, msg=None, **kwargs):
+ kwargs['expected'] = True
+ super().__init__(msg or 'The channel is not currently live', **kwargs)
+
+
class DownloadError(YoutubeDLError):
"""Download Error exception.
return filtered_headers
-class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
+class YoutubeDLHandler(urllib.request.HTTPHandler):
"""Handler for HTTP requests and responses.
This class, when installed with an OpenerDirector, automatically adds
"""
def __init__(self, params, *args, **kwargs):
- compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
+ urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
self._params = params
def http_open(self, req):
- conn_class = compat_http_client.HTTPConnection
+ conn_class = http.client.HTTPConnection
socks_proxy = req.headers.get('Ytdl-socks-proxy')
if socks_proxy:
req.headers = handle_youtubedl_headers(req.headers)
- return req
+ return super().do_request_(req)
def http_response(self, req, resp):
old_resp = resp
break
else:
raise original_ioerror
- resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
+ resp = urllib.request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
# deflate
if resp.headers.get('Content-encoding', '') == 'deflate':
gz = io.BytesIO(self.deflate(resp.read()))
- resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
+ resp = urllib.request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
# brotli
if resp.headers.get('Content-encoding', '') == 'br':
- resp = compat_urllib_request.addinfourl(
+ resp = urllib.request.addinfourl(
io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
def make_socks_conn_class(base_class, socks_proxy):
assert issubclass(base_class, (
- compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
+ http.client.HTTPConnection, http.client.HTTPSConnection))
- url_components = compat_urlparse.urlparse(socks_proxy)
+ url_components = urllib.parse.urlparse(socks_proxy)
if url_components.scheme.lower() == 'socks5':
socks_type = ProxyType.SOCKS5
elif url_components.scheme.lower() in ('socks', 'socks4'):
def unquote_if_non_empty(s):
if not s:
return s
- return compat_urllib_parse_unquote_plus(s)
+ return urllib.parse.unquote_plus(s)
proxy_args = (
socks_type,
self.sock.settimeout(self.timeout)
self.sock.connect((self.host, self.port))
- if isinstance(self, compat_http_client.HTTPSConnection):
+ if isinstance(self, http.client.HTTPSConnection):
if hasattr(self, '_context'): # Python > 2.6
self.sock = self._context.wrap_socket(
self.sock, server_hostname=self.host)
return SocksConnection
-class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
+class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
def __init__(self, params, https_conn_class=None, *args, **kwargs):
- compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
- self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
+ urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
+ self._https_conn_class = https_conn_class or http.client.HTTPSConnection
self._params = params
def https_open(self, req):
raise
-class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
+class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
"""
See [1] for cookie file format.
if self.filename is not None:
filename = self.filename
else:
- raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+ raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
# Store session cookies with `expires` set to 0 instead of an empty string
for cookie in self:
if self.filename is not None:
filename = self.filename
else:
- raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+ raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
def prepare_line(line):
if line.startswith(self._HTTPONLY_PREFIX):
return line
cookie_list = line.split('\t')
if len(cookie_list) != self._ENTRY_LEN:
- raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
+ raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit():
- raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
+ raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
return line
cf = io.StringIO()
for line in f:
try:
cf.write(prepare_line(line))
- except compat_cookiejar.LoadError as e:
+ except http.cookiejar.LoadError as e:
if f'{line.strip()} '[0] in '[{"':
- raise compat_cookiejar.LoadError(
+ raise http.cookiejar.LoadError(
'Cookies file must be Netscape formatted, not JSON. See '
'https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl')
write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
cookie.discard = True
-class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
+class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
def __init__(self, cookiejar=None):
- compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
+ urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
def http_response(self, request, response):
- return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
+ return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
- https_request = compat_urllib_request.HTTPCookieProcessor.http_request
+ https_request = urllib.request.HTTPCookieProcessor.http_request
https_response = http_response
-class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
+class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler):
"""YoutubeDL redirect handler
The code is based on HTTPRedirectHandler implementation from CPython [1].
3. https://github.com/ytdl-org/youtube-dl/issues/28768
"""
- http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
+ http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
def redirect_request(self, req, fp, code, msg, headers, newurl):
"""Return a Request or None in response to a redirect.
m = req.get_method()
if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
or code in (301, 302, 303) and m == "POST")):
- raise compat_HTTPError(req.full_url, code, msg, headers, fp)
+ raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
# Strictly (according to RFC 2616), 301 or 302 in response to
# a POST MUST NOT cause a redirection without confirmation
# from the user (of urllib.request, in this case). In practice,
if code in (301, 302) and m == 'POST':
m = 'GET'
- return compat_urllib_request.Request(
+ return urllib.request.Request(
newurl, headers=newheaders, origin_req_host=req.origin_req_host,
unverifiable=True, method=m)
$)
''', date_str)
if not m:
- timezone = datetime.timedelta()
+ m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
+ timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
+ if timezone is not None:
+ date_str = date_str[:-len(m.group('tz'))]
+ timezone = datetime.timedelta(hours=timezone or 0)
else:
date_str = date_str[:-len(m.group('tz'))]
if not m.group('sign'):
with contextlib.suppress(ValueError):
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
if upload_date is not None:
- return compat_str(upload_date)
+ return str(upload_date)
def unified_timestamp(date_str, day_first=True):
if date_str is None:
return None
- date_str = re.sub(r'[,|]', '', date_str)
+ date_str = re.sub(r'\s+', ' ', re.sub(
+ r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
timezone, date_str = extract_timezone(date_str)
with contextlib.suppress(ValueError):
dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
return calendar.timegm(dt.timetuple())
+
timetuple = email.utils.parsedate_tz(date_str)
if timetuple:
- return calendar.timegm(timetuple) + pm_delta * 3600
+ return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
def determine_ext(url, default_ext='unknown_video'):
def __str__(self):
return f'{self.start.isoformat()} - {self.end.isoformat()}'
+ def __eq__(self, other):
+ return (isinstance(other, DateRange)
+ and self.start == other.start and self.end == other.end)
+
def platform_name():
- """ Returns the platform name as a compat_str """
- res = platform.platform()
- if isinstance(res, bytes):
- res = res.decode(preferredencoding())
+ """ Returns the platform name as a str """
+ write_string('DeprecationWarning: yt_dlp.utils.platform_name is deprecated, use platform.platform instead')
+ return platform.platform()
+
- assert isinstance(res, compat_str)
- return res
+@functools.cache
+def system_identifier():
+ python_implementation = platform.python_implementation()
+ if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
+ python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
+
+ return 'Python %s (%s %s) - %s %s' % (
+ platform.python_version(),
+ python_implementation,
+ platform.architecture()[0],
+ platform.platform(),
+ format_field(join_nonempty(*platform.libc_ver(), delim=' '), None, '(%s)'),
+ )
@functools.cache
def intlist_to_bytes(xs):
if not xs:
return b''
- return compat_struct_pack('%dB' % len(xs), *xs)
+ return struct.pack('%dB' % len(xs), *xs)
class LockingUnsupportedError(OSError):
# Cross-platform file locking
if sys.platform == 'win32':
+ import ctypes
import ctypes.wintypes
import msvcrt
url, idata = unsmuggle_url(url, {})
data.update(idata)
- sdata = compat_urllib_parse_urlencode(
+ sdata = urllib.parse.urlencode(
{'__youtubedl_smuggle': json.dumps(data)})
return url + '#' + sdata
if '#__youtubedl_smuggle' not in smug_url:
return smug_url, default
url, _, sdata = smug_url.rpartition('#')
- jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
+ jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
data = json.loads(jsond)
return url, data
def parse_bitrate(s):
- if not isinstance(s, compat_str):
+ if not isinstance(s, str):
return
mobj = re.search(r'\b(\d+)\s*kbps', s)
if mobj:
def setproctitle(title):
- assert isinstance(title, compat_str)
+ assert isinstance(title, str)
- # ctypes in Jython is not complete
- # http://bugs.jython.org/issue2148
- if sys.platform.startswith('java'):
+ # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
+ try:
+ import ctypes
+ except ImportError:
return
try:
def get_domain(url):
- domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
- return domain.group('domain') if domain else None
+ """
+ This implementation is inconsistent, but is kept for compatibility.
+ Use this only for "webpage_url_domain"
+ """
+ return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None
def url_basename(url):
- path = compat_urlparse.urlparse(url).path
+ path = urllib.parse.urlparse(url).path
return path.strip('/').split('/')[-1]
def urljoin(base, path):
if isinstance(path, bytes):
path = path.decode()
- if not isinstance(path, compat_str) or not path:
+ if not isinstance(path, str) or not path:
return None
if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
return path
if isinstance(base, bytes):
base = base.decode()
- if not isinstance(base, compat_str) or not re.match(
+ if not isinstance(base, str) or not re.match(
r'^(?:https?:)?//', base):
return None
- return compat_urlparse.urljoin(base, path)
+ return urllib.parse.urljoin(base, path)
-class HEADRequest(compat_urllib_request.Request):
+class HEADRequest(urllib.request.Request):
def get_method(self):
return 'HEAD'
-class PUTRequest(compat_urllib_request.Request):
+class PUTRequest(urllib.request.Request):
def get_method(self):
return 'PUT'
def str_or_none(v, default=None):
- return default if v is None else compat_str(v)
+ return default if v is None else str(v)
def str_to_int(int_str):
""" A more relaxed version of int_or_none """
if isinstance(int_str, int):
return int_str
- elif isinstance(int_str, compat_str):
+ elif isinstance(int_str, str):
int_str = re.sub(r'[,\.\+]', '', int_str)
return int_or_none(int_str)
def strip_or_none(v, default=None):
- return v.strip() if isinstance(v, compat_str) else default
+ return v.strip() if isinstance(v, str) else default
def url_or_none(url):
- if not url or not isinstance(url, compat_str):
+ if not url or not isinstance(url, str):
return None
url = url.strip()
return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
def request_to_url(req):
- if isinstance(req, compat_urllib_request.Request):
+ if isinstance(req, urllib.request.Request):
return req.get_full_url()
else:
return req
try:
if isinstance(timestamp, (int, float)): # unix timestamp
datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
- elif isinstance(timestamp, compat_str): # assume YYYYMMDD
+ elif isinstance(timestamp, str): # assume YYYYMMDD
datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
return datetime_object.strftime(date_format)
except (ValueError, TypeError, AttributeError):
def detect_exe_version(output, version_re=None, unrecognized='present'):
- assert isinstance(output, compat_str)
+ assert isinstance(output, str)
if version_re is None:
version_re = r'version\s+([-0-9._a-zA-Z]+)'
m = re.search(version_re, output)
@staticmethod
def _reverse_index(x):
- return None if x is None else -(x + 1)
+ return None if x is None else ~x
def __getitem__(self, idx):
if isinstance(idx, slice):
for index in self.parse_playlist_items(playlist_items):
for i, entry in self[index]:
yield i, entry
+ if not entry:
+ continue
try:
# TODO: Add auto-generated fields
self.ydl._match_entry(entry, incomplete=True, silent=True)
def escape_url(url):
"""Escape URL as suggested by RFC 3986"""
- url_parsed = compat_urllib_parse_urlparse(url)
+ url_parsed = urllib.parse.urlparse(url)
return url_parsed._replace(
netloc=url_parsed.netloc.encode('idna').decode('ascii'),
path=escape_rfc3986(url_parsed.path),
def parse_qs(url):
- return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ return urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
def read_batch_urls(batch_fd):
def fixup(url):
- if not isinstance(url, compat_str):
+ if not isinstance(url, str):
url = url.decode('utf-8', 'replace')
BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
for bom in BOM_UTF8:
if not url or url.startswith(('#', ';', ']')):
return False
# "#" cannot be stripped out since it is part of the URI
- # However, it can be safely stipped out if follwing a whitespace
+ # However, it can be safely stripped out if following a whitespace
return re.split(r'\s#', url, 1)[0].rstrip()
with contextlib.closing(batch_fd) as fd:
def urlencode_postdata(*args, **kargs):
- return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
+ return urllib.parse.urlencode(*args, **kargs).encode('ascii')
def update_url_query(url, query):
if not query:
return url
- parsed_url = compat_urlparse.urlparse(url)
- qs = compat_parse_qs(parsed_url.query)
+ parsed_url = urllib.parse.urlparse(url)
+ qs = urllib.parse.parse_qs(parsed_url.query)
qs.update(query)
- return compat_urlparse.urlunparse(parsed_url._replace(
- query=compat_urllib_parse_urlencode(qs, True)))
+ return urllib.parse.urlunparse(parsed_url._replace(
+ query=urllib.parse.urlencode(qs, True)))
-def update_Request(req, url=None, data=None, headers={}, query={}):
+def update_Request(req, url=None, data=None, headers=None, query=None):
req_headers = req.headers.copy()
- req_headers.update(headers)
+ req_headers.update(headers or {})
req_data = data or req.data
req_url = update_url_query(url or req.get_full_url(), query)
req_get_method = req.get_method()
elif req_get_method == 'PUT':
req_type = PUTRequest
else:
- req_type = compat_urllib_request.Request
+ req_type = urllib.request.Request
new_req = req_type(
req_url, data=req_data, headers=req_headers,
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
out = b''
for k, v in data.items():
out += b'--' + boundary.encode('ascii') + b'\r\n'
- if isinstance(k, compat_str):
+ if isinstance(k, str):
k = k.encode()
- if isinstance(v, compat_str):
+ if isinstance(v, str):
v = v.encode()
# RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
# suggests sending UTF-8 directly. Firefox sends UTF-8, too
def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
- return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
+ return string if isinstance(string, str) else str(string, encoding, errors)
US_RATINGS = {
r'\g<callback_data>', code)
-def js_to_json(code, vars={}):
+def js_to_json(code, vars={}, *, strict=False):
# vars is a dict of var, val pairs to substitute
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
if v in vars:
return vars[v]
+ if strict:
+ raise ValueError(f'Unknown value: {v}')
return '"%s"' % v
- code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
+ def create_map(mobj):
+ return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
+
+ code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
+ if not strict:
+ code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
return re.sub(r'''(?sx)
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
return q
-POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist')
+POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
DEFAULT_OUTTMPL = {
str.strip, codecs_str.strip().strip(',').split(','))))
vcodec, acodec, scodec, hdr = None, None, None, None
for full_codec in split_codecs:
- parts = full_codec.split('.')
- codec = parts[0].replace('0', '')
- if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
- 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
- if not vcodec:
- vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
- if codec in ('dvh1', 'dvhe'):
- hdr = 'DV'
- elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
- hdr = 'HDR10'
- elif full_codec.replace('0', '').startswith('vp9.2'):
- hdr = 'HDR10'
- elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
- if not acodec:
- acodec = full_codec
- elif codec in ('stpp', 'wvtt',):
- if not scodec:
- scodec = full_codec
+ parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
+ if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
+ 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
+ if vcodec:
+ continue
+ vcodec = full_codec
+ if parts[0] in ('dvh1', 'dvhe'):
+ hdr = 'DV'
+ elif parts[0] == 'av1' and traverse_obj(parts, 3) == '10':
+ hdr = 'HDR10'
+ elif parts[:2] == ['vp9', '2']:
+ hdr = 'HDR10'
+ elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac',
+ 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
+ acodec = acodec or full_codec
+ elif parts[0] in ('stpp', 'wvtt'):
+ scodec = scodec or full_codec
else:
write_string(f'WARNING: Unknown codec {full_codec}\n')
if vcodec or acodec or scodec:
return {}
+def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
+ assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
+
+ allow_mkv = not preferences or 'mkv' in preferences
+
+ if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
+ return 'mkv' # TODO: any other format allows this?
+
+ # TODO: All codecs supported by parse_codecs isn't handled here
+ COMPATIBLE_CODECS = {
+ 'mp4': {
+ 'av1', 'hevc', 'avc1', 'mp4a', # fourcc (m3u8, mpd)
+ 'h264', 'aacl', # Set in ISM
+ },
+ 'webm': {
+ 'av1', 'vp9', 'vp8', 'opus', 'vrbs',
+ 'vp9x', 'vp8x', # in the webm spec
+ },
+ }
+
+ sanitize_codec = functools.partial(try_get, getter=lambda x: x[0].split('.')[0].replace('0', ''))
+ vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
+
+ for ext in preferences or COMPATIBLE_CODECS.keys():
+ codec_set = COMPATIBLE_CODECS.get(ext, set())
+ if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
+ return ext
+
+ COMPATIBLE_EXTS = (
+ {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
+ {'webm'},
+ )
+ for ext in preferences or vexts:
+ current_exts = {ext, *vexts, *aexts}
+ if ext == 'mkv' or current_exts == {ext} or any(
+ ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
+ return ext
+ return 'mkv' if allow_mkv else preferences[-1]
+
+
def urlhandle_detect_ext(url_handle):
getheader = url_handle.headers.get
return age_limit < content_limit
+# List of known byte-order-marks (BOM)
+BOMS = [
+ (b'\xef\xbb\xbf', 'utf-8'),
+ (b'\x00\x00\xfe\xff', 'utf-32-be'),
+ (b'\xff\xfe\x00\x00', 'utf-32-le'),
+ (b'\xff\xfe', 'utf-16-le'),
+ (b'\xfe\xff', 'utf-16-be'),
+]
+
+
def is_html(first_bytes):
""" Detect whether a file contains HTML by examining its first bytes. """
- BOMS = [
- (b'\xef\xbb\xbf', 'utf-8'),
- (b'\x00\x00\xfe\xff', 'utf-32-be'),
- (b'\xff\xfe\x00\x00', 'utf-32-le'),
- (b'\xff\xfe', 'utf-16-le'),
- (b'\xfe\xff', 'utf-16-be'),
- ]
-
encoding = 'utf-8'
for bom, enc in BOMS:
while first_bytes.startswith(bom):
elif ext == 'f4m':
return 'f4m'
- return compat_urllib_parse_urlparse(url).scheme
+ return urllib.parse.urlparse(url).scheme
def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
return NO_DEFAULT if interactive and not incomplete else None
else:
- video_title = info_dict.get('title') or info_dict.get('id') or 'video'
+ video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
filter_str = ') | ('.join(map(str.strip, filters))
return f'{video_title} does not pass filter ({filter_str}), skipping ..'
return _match_func
-def download_range_func(chapters, ranges):
- def inner(info_dict, ydl):
+class download_range_func:
+ def __init__(self, chapters, ranges):
+ self.chapters, self.ranges = chapters, ranges
+
+ def __call__(self, info_dict, ydl):
warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
else 'Cannot match chapters since chapter information is unavailable')
- for regex in chapters or []:
+ for regex in self.chapters or []:
for i, chapter in enumerate(info_dict.get('chapters') or []):
if re.search(regex, chapter['title']):
warning = None
yield {**chapter, 'index': i}
- if chapters and warning:
+ if self.chapters and warning:
ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
- yield from ({'start_time': start, 'end_time': end} for start, end in ranges or [])
+ yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
- return inner
+ def __eq__(self, other):
+ return (isinstance(other, download_range_func)
+ and self.chapters == other.chapters and self.ranges == other.ranges)
def parse_dfxp_time_expr(time_expr):
else:
block = code_or_block
addr, preflen = block.split('/')
- addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
+ addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
addr_max = addr_min | (0xffffffff >> int(preflen))
- return compat_str(socket.inet_ntoa(
- compat_struct_pack('!L', random.randint(addr_min, addr_max))))
+ return str(socket.inet_ntoa(
+ struct.pack('!L', random.randint(addr_min, addr_max))))
-class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
+class PerRequestProxyHandler(urllib.request.ProxyHandler):
def __init__(self, proxies=None):
# Set default handlers
for type in ('http', 'https'):
setattr(self, '%s_open' % type,
lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
meth(r, proxy, type))
- compat_urllib_request.ProxyHandler.__init__(self, proxies)
+ urllib.request.ProxyHandler.__init__(self, proxies)
def proxy_open(self, req, proxy, type):
req_proxy = req.headers.get('Ytdl-request-proxy')
if proxy == '__noproxy__':
return None # No Proxy
- if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
+ if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
req.add_header('Ytdl-socks-proxy', proxy)
# yt-dlp's http/https handlers do wrapping the socket with socks
return None
- return compat_urllib_request.ProxyHandler.proxy_open(
+ return urllib.request.ProxyHandler.proxy_open(
self, req, proxy, type)
s = b''
n = int(n)
while n > 0:
- s = compat_struct_pack('>I', n & 0xffffffff) + s
+ s = struct.pack('>I', n & 0xffffffff) + s
n = n >> 32
# strip off leading zeros
for i in range(len(s)):
s = b'\000' * extra + s
length = length + extra
for i in range(0, length, 4):
- acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
+ acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
return acc
return [0, 2] + pseudo_random + [0] + data
-def encode_base_n(num, n, table=None):
- FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
- if not table:
- table = FULL_TABLE[:n]
+def _base_n_table(n, table):
+ if not table and not n:
+ raise ValueError('Either table or n must be specified')
+ table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
+
+ if n and n != len(table):
+ raise ValueError(f'base {n} exceeds table length {len(table)}')
+ return table
- if n > len(table):
- raise ValueError('base %d exceeds table length %d' % (n, len(table)))
- if num == 0:
+def encode_base_n(num, n=None, table=None):
+ """Convert given int to a base-n string"""
+ table = _base_n_table(n, table)
+ if not num:
return table[0]
- ret = ''
+ result, base = '', len(table)
while num:
- ret = table[num % n] + ret
- num = num // n
- return ret
+ result = table[num % base] + result
+ num = num // base
+ return result
+
+
+def decode_base_n(string, n=None, table=None):
+ """Convert given base-n string to int"""
+ table = {char: index for index, char in enumerate(_base_n_table(n, table))}
+ result, base = 0, len(table)
+ for char in string:
+ result = result * base + table[char]
+ return result
+
+
+def decode_base(value, digits):
+ write_string('DeprecationWarning: yt_dlp.utils.decode_base is deprecated '
+ 'and may be removed in a future version. Use yt_dlp.decode_base_n instead')
+ return decode_base_n(value, table=digits)
def decode_packed_codes(code):
raise OSError('Not a valid PNG file.')
int_map = {1: '>B', 2: '>H', 4: '>I'}
- unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
+ unpack_integer = lambda x: struct.unpack(int_map[len(x)], x)[0]
chunks = []
return
# UNIX Method 1. Use xattrs/pyxattrs modules
- from .dependencies import xattr
setxattr = None
if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
"""
- iri_parts = compat_urllib_parse_urlparse(iri)
+ iri_parts = urllib.parse.urlparse(iri)
if '[' in iri_parts.netloc:
raise ValueError('IPv6 URIs are not, yet, supported.')
return path
-def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=None):
+def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
val = traverse_obj(obj, *variadic(field))
- if (not val and val != 0) if ignore is NO_DEFAULT else val in ignore:
+ if (not val and val != 0) if ignore is NO_DEFAULT else val in variadic(ignore):
return default
- return template % (func(val) if func else val)
+ return template % func(val)
def clean_podcast_url(url):
if isinstance(expected_type, type):
type_test = lambda val: val if isinstance(val, expected_type) else None
- elif expected_type is not None:
- type_test = expected_type
else:
- type_test = lambda val: val
+ type_test = expected_type or IDENTITY
for path in path_list:
depth = 0
return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
-def decode_base(value, digits):
- # This will convert given base-x string to scalar (long or int)
- table = {char: index for index, char in enumerate(digits)}
- result = 0
- base = len(digits)
- for chr in value:
- result *= base
- result += table[chr]
- return result
-
-
def time_seconds(**kwargs):
t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
return t.timestamp()
def join_nonempty(*values, delim='-', from_dict=None):
if from_dict is not None:
- values = map(from_dict.get, values)
+ values = (traverse_obj(from_dict, variadic(v)) for v in values)
return delim.join(map(str, filter(None, values)))
return sys.stdin
+def determine_file_encoding(data):
+ """
+ Detect the text encoding used
+ @returns (encoding, bytes to skip)
+ """
+
+ # BOM marks are given priority over declarations
+ for bom, enc in BOMS:
+ if data.startswith(bom):
+ return enc, len(bom)
+
+ # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
+ # We ignore the endianness to get a good enough match
+ data = data.replace(b'\0', b'')
+ mobj = re.match(rb'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data)
+ return mobj.group(1).decode() if mobj else None, 0
+
+
class Config:
own_args = None
parsed_args = None
def init(self, args=None, filename=None):
assert not self.__initialized
+ self.own_args, self.filename = args, filename
+ return self.load_configs()
+
+ def load_configs(self):
directory = ''
- if filename:
- location = os.path.realpath(filename)
+ if self.filename:
+ location = os.path.realpath(self.filename)
directory = os.path.dirname(location)
if location in self._loaded_paths:
return False
self._loaded_paths.add(location)
- self.own_args, self.__initialized = args, True
- opts, _ = self.parser.parse_known_args(args)
- self.parsed_args, self.filename = args, filename
-
+ self.__initialized = True
+ opts, _ = self.parser.parse_known_args(self.own_args)
+ self.parsed_args = self.own_args
for location in opts.config_locations or []:
if location == '-':
self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
@staticmethod
def read_file(filename, default=[]):
try:
- optionf = open(filename)
+ optionf = open(filename, 'rb')
except OSError:
return default # silently skip if file is not present
+ try:
+ enc, skip = determine_file_encoding(optionf.read(512))
+ optionf.seek(skip, io.SEEK_SET)
+ except OSError:
+ enc = None # silently skip read errors
try:
# FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
- contents = optionf.read()
+ contents = optionf.read().decode(enc or preferredencoding())
res = shlex.split(contents, comments=True)
+ except Exception as err:
+ raise ValueError(f'Unable to parse "{filename}": {err}')
finally:
optionf.close()
return res
return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
+def cached_method(f):
+ """Cache a method"""
+ signature = inspect.signature(f)
+
+ @functools.wraps(f)
+ def wrapper(self, *args, **kwargs):
+ bound_args = signature.bind(self, *args, **kwargs)
+ bound_args.apply_defaults()
+ key = tuple(bound_args.arguments.values())
+
+ if not hasattr(self, '__cached_method__cache'):
+ self.__cached_method__cache = {}
+ cache = self.__cached_method__cache.setdefault(f.__name__, {})
+ if key not in cache:
+ cache[key] = f(self, *args, **kwargs)
+ return cache[key]
+ return wrapper
+
+
class classproperty:
- """classmethod(property(func)) that works in py < 3.9"""
+ """property access for class methods"""
def __init__(self, func):
functools.update_wrapper(self, func)
return self.__dict__.items()
+MEDIA_EXTENSIONS = Namespace(
+ common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
+ video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
+ common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
+ audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma'),
+ thumbnails=('jpg', 'png', 'webp'),
+ storyboards=('mhtml', ),
+ subtitles=('srt', 'vtt', 'ass', 'lrc'),
+ manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
+)
+MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
+MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
+
+KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
+
+
+class RetryManager:
+ """Usage:
+ for retry in RetryManager(...):
+ try:
+ ...
+ except SomeException as err:
+ retry.error = err
+ continue
+ """
+ attempt, _error = 0, None
+
+ def __init__(self, _retries, _error_callback, **kwargs):
+ self.retries = _retries or 0
+ self.error_callback = functools.partial(_error_callback, **kwargs)
+
+ def _should_retry(self):
+ return self._error is not NO_DEFAULT and self.attempt <= self.retries
+
+ @property
+ def error(self):
+ if self._error is NO_DEFAULT:
+ return None
+ return self._error
+
+ @error.setter
+ def error(self, value):
+ self._error = value
+
+ def __iter__(self):
+ while self._should_retry():
+ self.error = NO_DEFAULT
+ self.attempt += 1
+ yield self
+ if self.error:
+ self.error_callback(self.error, self.attempt, self.retries)
+
+ @staticmethod
+ def report_retry(e, count, retries, *, sleep_func, info, warn, error=None, suffix=None):
+ """Utility function for reporting retries"""
+ if count > retries:
+ if error:
+ return error(f'{e}. Giving up after {count - 1} retries') if count > 1 else error(str(e))
+ raise e
+
+ if not count:
+ return warn(e)
+ elif isinstance(e, ExtractorError):
+ e = remove_end(str_or_none(e.cause) or e.orig_msg, '.')
+ warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
+
+ delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
+ if delay:
+ info(f'Sleeping {delay:.2f} seconds ...')
+ time.sleep(delay)
+
+
+def make_archive_id(ie, video_id):
+ ie_key = ie if isinstance(ie, str) else ie.ie_key()
+ return f'{ie_key.lower()} {video_id}'
+
+
+def truncate_string(s, left, right=0):
+ assert left > 3 and right >= 0
+ if s is None or len(s) <= left + right:
+ return s
+ return f'{s[:left-3]}...{s[-right:]}'
+
+
# Deprecated
has_certifi = bool(certifi)
has_websockets = bool(websockets)