-#!/usr/bin/env python3
+import asyncio
import atexit
import base64
import binascii
import gzip
import hashlib
import hmac
+import html.entities
+import html.parser
+import http.client
+import http.cookiejar
import importlib.util
+import inspect
import io
import itertools
import json
import shlex
import socket
import ssl
+import struct
import subprocess
import sys
import tempfile
import time
import traceback
import types
+import urllib.error
import urllib.parse
+import urllib.request
import xml.etree.ElementTree
import zlib
-from .compat import asyncio, functools # isort: split
+from .compat import functools # isort: split
from .compat import (
- compat_chr,
- compat_cookiejar,
compat_etree_fromstring,
compat_expanduser,
- compat_html_entities,
- compat_html_entities_html5,
compat_HTMLParseError,
- compat_HTMLParser,
- compat_http_client,
- compat_HTTPError,
compat_os_name,
- compat_parse_qs,
compat_shlex_quote,
- compat_str,
- compat_struct_pack,
- compat_struct_unpack,
- compat_urllib_error,
- compat_urllib_parse_unquote_plus,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_urlparse,
- compat_urllib_request,
- compat_urlparse,
)
-from .dependencies import brotli, certifi, websockets
+from .dependencies import brotli, certifi, websockets, xattr
from .socks import ProxyType, sockssocket
# In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
# URLs with protocols not in urlparse.uses_netloc are not handled correctly
for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
- if scheme not in compat_urlparse.uses_netloc:
- compat_urlparse.uses_netloc.append(scheme)
+ if scheme not in urllib.parse.uses_netloc:
+ urllib.parse.uses_netloc.append(scheme)
# This is not clearly defined otherwise
'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
}
-KNOWN_EXTENSIONS = (
- 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
- 'flv', 'f4v', 'f4a', 'f4b',
- 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
- 'mkv', 'mka', 'mk3d',
- 'avi', 'divx',
- 'mov',
- 'asf', 'wmv', 'wma',
- '3gp', '3g2',
- 'mp3',
- 'flac',
- 'ape',
- 'wav',
- 'f4f', 'f4m', 'm3u8', 'smil')
-
# needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
'%d/%m/%Y',
'%d/%m/%y',
'%d/%m/%Y %H:%M:%S',
+ '%d-%m-%Y %H:%M',
])
DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
])
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
-JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
+JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?})\s*</script>'
NUMBER_RE = r'\d+(?:\.\d+)?'
def _find_xpath(xpath):
return node.find(xpath)
- if isinstance(xpath, (str, compat_str)):
+ if isinstance(xpath, str):
n = _find_xpath(xpath)
else:
for xp in xpath:
)
-class HTMLBreakOnClosingTagParser(compat_HTMLParser):
+class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
"""
HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
closing tag for the first opening tag it has encountered, and can be used
def __init__(self):
self.tagstack = collections.deque()
- compat_HTMLParser.__init__(self)
+ html.parser.HTMLParser.__init__(self)
def __enter__(self):
return self
raise compat_HTMLParseError('unexpected end of html')
-class HTMLAttributeParser(compat_HTMLParser):
+class HTMLAttributeParser(html.parser.HTMLParser):
"""Trivial HTML parser to gather the attributes for a single element"""
def __init__(self):
self.attrs = {}
- compat_HTMLParser.__init__(self)
+ html.parser.HTMLParser.__init__(self)
def handle_starttag(self, tag, attrs):
self.attrs = dict(attrs)
-class HTMLListAttrsParser(compat_HTMLParser):
+class HTMLListAttrsParser(html.parser.HTMLParser):
"""HTML parser to gather the attributes for the elements of a list"""
def __init__(self):
- compat_HTMLParser.__init__(self)
+ html.parser.HTMLParser.__init__(self)
self.items = []
self._level = 0
if filename == '-':
if sys.platform == 'win32':
import msvcrt
- msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
+ # stdout may be any IO stream. Eg, when using contextlib.redirect_stdout
+ with contextlib.suppress(io.UnsupportedOperation):
+ msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
for attempt in range(2):
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
result = ''.join(map(replace_insane, s))
if is_id is NO_DEFAULT:
- result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result) # Remove repeated substitute chars
- STRIP_RE = '(?:\0.|[ _-])*'
+ result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result) # Remove repeated substitute chars
+ STRIP_RE = r'(?:\0.|[ _-])*'
result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result) # Remove substitute chars from start/end
result = result.replace('\0', '') or '_'
return os.path.join(*sanitized_path)
-def sanitize_url(url):
+def sanitize_url(url, *, scheme='http'):
# Prepend protocol-less URLs with `http:` scheme in order to mitigate
# the number of unwanted failures due to missing protocol
if url is None:
return
elif url.startswith('//'):
- return 'http:%s' % url
+ return f'{scheme}:{url}'
# Fix some common typos seen so far
COMMON_TYPOS = (
# https://github.com/ytdl-org/youtube-dl/issues/15649
def extract_basic_auth(url):
- parts = compat_urlparse.urlsplit(url)
+ parts = urllib.parse.urlsplit(url)
if parts.username is None:
return url, None
- url = compat_urlparse.urlunsplit(parts._replace(netloc=(
+ url = urllib.parse.urlunsplit(parts._replace(netloc=(
parts.hostname if parts.port is None
else '%s:%d' % (parts.hostname, parts.port))))
auth_payload = base64.b64encode(
if auth_header is not None:
headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
headers['Authorization'] = auth_header
- return compat_urllib_request.Request(url, *args, **kwargs)
+ return urllib.request.Request(url, *args, **kwargs)
def expand_path(s):
entity = entity_with_semicolon[:-1]
# Known non-numeric HTML entity
- if entity in compat_html_entities.name2codepoint:
- return compat_chr(compat_html_entities.name2codepoint[entity])
+ if entity in html.entities.name2codepoint:
+ return chr(html.entities.name2codepoint[entity])
# TODO: HTML5 allows entities without a semicolon. For example,
# 'Éric' should be decoded as 'Éric'.
- if entity_with_semicolon in compat_html_entities_html5:
- return compat_html_entities_html5[entity_with_semicolon]
+ if entity_with_semicolon in html.entities.html5:
+ return html.entities.html5[entity_with_semicolon]
mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
if mobj is not None:
base = 10
# See https://github.com/ytdl-org/youtube-dl/issues/7518
with contextlib.suppress(ValueError):
- return compat_chr(int(numstr, base))
+ return chr(int(numstr, base))
# Unknown entity in name, return its literal representation
return '&%s;' % entity
def encodeArgument(s):
# Legacy code that uses byte strings
# Uncomment the following line after fixing all post processors
- # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
+ # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
return s if isinstance(s, str) else s.decode('ascii')
if isinstance(optval, bytes):
optval = optval.decode(preferredencoding())
- assert isinstance(optval, compat_str)
+ assert isinstance(optval, str)
return optval
if opts_check_certificate:
if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
context.load_verify_locations(cafile=certifi.where())
- try:
- context.load_default_certs()
- # Work around the issue in load_default_certs when there are bad certificates. See:
- # https://github.com/yt-dlp/yt-dlp/issues/1060,
- # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
- except ssl.SSLError:
- # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
- if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
- for storename in ('CA', 'ROOT'):
- _ssl_load_windows_store_certs(context, storename)
- context.set_default_verify_paths()
+ else:
+ try:
+ context.load_default_certs()
+ # Work around the issue in load_default_certs when there are bad certificates. See:
+ # https://github.com/yt-dlp/yt-dlp/issues/1060,
+ # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
+ except ssl.SSLError:
+ # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
+ if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
+ for storename in ('CA', 'ROOT'):
+ _ssl_load_windows_store_certs(context, storename)
+ context.set_default_verify_paths()
client_certfile = params.get('client_certificate')
if client_certfile:
super().__init__(self.msg)
-network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
+network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
if hasattr(ssl, 'CertificateError'):
network_exceptions.append(ssl.CertificateError)
network_exceptions = tuple(network_exceptions)
self.countries = countries
+class UserNotLive(ExtractorError):
+ """Error when a channel/user is not live"""
+
+ def __init__(self, msg=None, **kwargs):
+ kwargs['expected'] = True
+ super().__init__(msg or 'The channel is not currently live', **kwargs)
+
+
class DownloadError(YoutubeDLError):
"""Download Error exception.
return filtered_headers
-class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
+class YoutubeDLHandler(urllib.request.HTTPHandler):
"""Handler for HTTP requests and responses.
This class, when installed with an OpenerDirector, automatically adds
"""
def __init__(self, params, *args, **kwargs):
- compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
+ urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
self._params = params
def http_open(self, req):
- conn_class = compat_http_client.HTTPConnection
+ conn_class = http.client.HTTPConnection
socks_proxy = req.headers.get('Ytdl-socks-proxy')
if socks_proxy:
req.headers = handle_youtubedl_headers(req.headers)
- return req
+ return super().do_request_(req)
def http_response(self, req, resp):
old_resp = resp
break
else:
raise original_ioerror
- resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
+ resp = urllib.request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
# deflate
if resp.headers.get('Content-encoding', '') == 'deflate':
gz = io.BytesIO(self.deflate(resp.read()))
- resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
+ resp = urllib.request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
# brotli
if resp.headers.get('Content-encoding', '') == 'br':
- resp = compat_urllib_request.addinfourl(
+ resp = urllib.request.addinfourl(
io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
def make_socks_conn_class(base_class, socks_proxy):
assert issubclass(base_class, (
- compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
+ http.client.HTTPConnection, http.client.HTTPSConnection))
- url_components = compat_urlparse.urlparse(socks_proxy)
+ url_components = urllib.parse.urlparse(socks_proxy)
if url_components.scheme.lower() == 'socks5':
socks_type = ProxyType.SOCKS5
elif url_components.scheme.lower() in ('socks', 'socks4'):
def unquote_if_non_empty(s):
if not s:
return s
- return compat_urllib_parse_unquote_plus(s)
+ return urllib.parse.unquote_plus(s)
proxy_args = (
socks_type,
self.sock.settimeout(self.timeout)
self.sock.connect((self.host, self.port))
- if isinstance(self, compat_http_client.HTTPSConnection):
+ if isinstance(self, http.client.HTTPSConnection):
if hasattr(self, '_context'): # Python > 2.6
self.sock = self._context.wrap_socket(
self.sock, server_hostname=self.host)
return SocksConnection
-class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
+class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
def __init__(self, params, https_conn_class=None, *args, **kwargs):
- compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
- self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
+ urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
+ self._https_conn_class = https_conn_class or http.client.HTTPSConnection
self._params = params
def https_open(self, req):
raise
-class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
+class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
"""
See [1] for cookie file format.
if self.filename is not None:
filename = self.filename
else:
- raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+ raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
# Store session cookies with `expires` set to 0 instead of an empty string
for cookie in self:
if self.filename is not None:
filename = self.filename
else:
- raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+ raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
def prepare_line(line):
if line.startswith(self._HTTPONLY_PREFIX):
return line
cookie_list = line.split('\t')
if len(cookie_list) != self._ENTRY_LEN:
- raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
+ raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit():
- raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
+ raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
return line
cf = io.StringIO()
for line in f:
try:
cf.write(prepare_line(line))
- except compat_cookiejar.LoadError as e:
+ except http.cookiejar.LoadError as e:
if f'{line.strip()} '[0] in '[{"':
- raise compat_cookiejar.LoadError(
+ raise http.cookiejar.LoadError(
'Cookies file must be Netscape formatted, not JSON. See '
'https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl')
write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
cookie.discard = True
-class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
+class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
def __init__(self, cookiejar=None):
- compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
+ urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
def http_response(self, request, response):
- return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
+ return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
- https_request = compat_urllib_request.HTTPCookieProcessor.http_request
+ https_request = urllib.request.HTTPCookieProcessor.http_request
https_response = http_response
-class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
+class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler):
"""YoutubeDL redirect handler
The code is based on HTTPRedirectHandler implementation from CPython [1].
3. https://github.com/ytdl-org/youtube-dl/issues/28768
"""
- http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
+ http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
def redirect_request(self, req, fp, code, msg, headers, newurl):
"""Return a Request or None in response to a redirect.
m = req.get_method()
if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
or code in (301, 302, 303) and m == "POST")):
- raise compat_HTTPError(req.full_url, code, msg, headers, fp)
+ raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
# Strictly (according to RFC 2616), 301 or 302 in response to
# a POST MUST NOT cause a redirection without confirmation
# from the user (of urllib.request, in this case). In practice,
if code in (301, 302) and m == 'POST':
m = 'GET'
- return compat_urllib_request.Request(
+ return urllib.request.Request(
newurl, headers=newheaders, origin_req_host=req.origin_req_host,
unverifiable=True, method=m)
with contextlib.suppress(ValueError):
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
if upload_date is not None:
- return compat_str(upload_date)
+ return str(upload_date)
def unified_timestamp(date_str, day_first=True):
def __str__(self):
return f'{self.start.isoformat()} - {self.end.isoformat()}'
+ def __eq__(self, other):
+ return (isinstance(other, DateRange)
+ and self.start == other.start and self.end == other.end)
+
def platform_name():
- """ Returns the platform name as a compat_str """
- res = platform.platform()
- if isinstance(res, bytes):
- res = res.decode(preferredencoding())
+ """ Returns the platform name as a str """
+ write_string('DeprecationWarning: yt_dlp.utils.platform_name is deprecated, use platform.platform instead')
+ return platform.platform()
- assert isinstance(res, compat_str)
- return res
+
+@functools.cache
+def system_identifier():
+ python_implementation = platform.python_implementation()
+ if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
+ python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
+
+ return 'Python %s (%s %s) - %s %s' % (
+ platform.python_version(),
+ python_implementation,
+ platform.architecture()[0],
+ platform.platform(),
+ format_field(join_nonempty(*platform.libc_ver(), delim=' '), None, '(%s)'),
+ )
@functools.cache
def intlist_to_bytes(xs):
if not xs:
return b''
- return compat_struct_pack('%dB' % len(xs), *xs)
+ return struct.pack('%dB' % len(xs), *xs)
class LockingUnsupportedError(OSError):
url, idata = unsmuggle_url(url, {})
data.update(idata)
- sdata = compat_urllib_parse_urlencode(
+ sdata = urllib.parse.urlencode(
{'__youtubedl_smuggle': json.dumps(data)})
return url + '#' + sdata
if '#__youtubedl_smuggle' not in smug_url:
return smug_url, default
url, _, sdata = smug_url.rpartition('#')
- jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
+ jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
data = json.loads(jsond)
return url, data
def parse_bitrate(s):
- if not isinstance(s, compat_str):
+ if not isinstance(s, str):
return
mobj = re.search(r'\b(\d+)\s*kbps', s)
if mobj:
def setproctitle(title):
- assert isinstance(title, compat_str)
+ assert isinstance(title, str)
# ctypes in Jython is not complete
# http://bugs.jython.org/issue2148
def get_domain(url):
- domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
- return domain.group('domain') if domain else None
+ """
+ This implementation is inconsistent, but is kept for compatibility.
+ Use this only for "webpage_url_domain"
+ """
+ return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None
def url_basename(url):
- path = compat_urlparse.urlparse(url).path
+ path = urllib.parse.urlparse(url).path
return path.strip('/').split('/')[-1]
def urljoin(base, path):
if isinstance(path, bytes):
path = path.decode()
- if not isinstance(path, compat_str) or not path:
+ if not isinstance(path, str) or not path:
return None
if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
return path
if isinstance(base, bytes):
base = base.decode()
- if not isinstance(base, compat_str) or not re.match(
+ if not isinstance(base, str) or not re.match(
r'^(?:https?:)?//', base):
return None
- return compat_urlparse.urljoin(base, path)
+ return urllib.parse.urljoin(base, path)
-class HEADRequest(compat_urllib_request.Request):
+class HEADRequest(urllib.request.Request):
def get_method(self):
return 'HEAD'
-class PUTRequest(compat_urllib_request.Request):
+class PUTRequest(urllib.request.Request):
def get_method(self):
return 'PUT'
def str_or_none(v, default=None):
- return default if v is None else compat_str(v)
+ return default if v is None else str(v)
def str_to_int(int_str):
""" A more relaxed version of int_or_none """
if isinstance(int_str, int):
return int_str
- elif isinstance(int_str, compat_str):
+ elif isinstance(int_str, str):
int_str = re.sub(r'[,\.\+]', '', int_str)
return int_or_none(int_str)
def strip_or_none(v, default=None):
- return v.strip() if isinstance(v, compat_str) else default
+ return v.strip() if isinstance(v, str) else default
def url_or_none(url):
- if not url or not isinstance(url, compat_str):
+ if not url or not isinstance(url, str):
return None
url = url.strip()
return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
def request_to_url(req):
- if isinstance(req, compat_urllib_request.Request):
+ if isinstance(req, urllib.request.Request):
return req.get_full_url()
else:
return req
try:
if isinstance(timestamp, (int, float)): # unix timestamp
datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
- elif isinstance(timestamp, compat_str): # assume YYYYMMDD
+ elif isinstance(timestamp, str): # assume YYYYMMDD
datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
return datetime_object.strftime(date_format)
except (ValueError, TypeError, AttributeError):
def detect_exe_version(output, version_re=None, unrecognized='present'):
- assert isinstance(output, compat_str)
+ assert isinstance(output, str)
if version_re is None:
version_re = r'version\s+([-0-9._a-zA-Z]+)'
m = re.search(version_re, output)
@staticmethod
def _reverse_index(x):
- return None if x is None else -(x + 1)
+ return None if x is None else ~x
def __getitem__(self, idx):
if isinstance(idx, slice):
for index in self.parse_playlist_items(playlist_items):
for i, entry in self[index]:
yield i, entry
+ if not entry:
+ continue
try:
# TODO: Add auto-generated fields
self.ydl._match_entry(entry, incomplete=True, silent=True)
def escape_url(url):
"""Escape URL as suggested by RFC 3986"""
- url_parsed = compat_urllib_parse_urlparse(url)
+ url_parsed = urllib.parse.urlparse(url)
return url_parsed._replace(
netloc=url_parsed.netloc.encode('idna').decode('ascii'),
path=escape_rfc3986(url_parsed.path),
def parse_qs(url):
- return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ return urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
def read_batch_urls(batch_fd):
def fixup(url):
- if not isinstance(url, compat_str):
+ if not isinstance(url, str):
url = url.decode('utf-8', 'replace')
BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
for bom in BOM_UTF8:
if not url or url.startswith(('#', ';', ']')):
return False
# "#" cannot be stripped out since it is part of the URI
- # However, it can be safely stipped out if follwing a whitespace
+ # However, it can be safely stripped out if following a whitespace
return re.split(r'\s#', url, 1)[0].rstrip()
with contextlib.closing(batch_fd) as fd:
def urlencode_postdata(*args, **kargs):
- return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
+ return urllib.parse.urlencode(*args, **kargs).encode('ascii')
def update_url_query(url, query):
if not query:
return url
- parsed_url = compat_urlparse.urlparse(url)
- qs = compat_parse_qs(parsed_url.query)
+ parsed_url = urllib.parse.urlparse(url)
+ qs = urllib.parse.parse_qs(parsed_url.query)
qs.update(query)
- return compat_urlparse.urlunparse(parsed_url._replace(
- query=compat_urllib_parse_urlencode(qs, True)))
+ return urllib.parse.urlunparse(parsed_url._replace(
+ query=urllib.parse.urlencode(qs, True)))
-def update_Request(req, url=None, data=None, headers={}, query={}):
+def update_Request(req, url=None, data=None, headers=None, query=None):
req_headers = req.headers.copy()
- req_headers.update(headers)
+ req_headers.update(headers or {})
req_data = data or req.data
req_url = update_url_query(url or req.get_full_url(), query)
req_get_method = req.get_method()
elif req_get_method == 'PUT':
req_type = PUTRequest
else:
- req_type = compat_urllib_request.Request
+ req_type = urllib.request.Request
new_req = req_type(
req_url, data=req_data, headers=req_headers,
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
out = b''
for k, v in data.items():
out += b'--' + boundary.encode('ascii') + b'\r\n'
- if isinstance(k, compat_str):
+ if isinstance(k, str):
k = k.encode()
- if isinstance(v, compat_str):
+ if isinstance(v, str):
v = v.encode()
# RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
# suggests sending UTF-8 directly. Firefox sends UTF-8, too
def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
- return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
+ return string if isinstance(string, str) else str(string, encoding, errors)
US_RATINGS = {
str.strip, codecs_str.strip().strip(',').split(','))))
vcodec, acodec, scodec, hdr = None, None, None, None
for full_codec in split_codecs:
- parts = full_codec.split('.')
- codec = parts[0].replace('0', '')
- if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
- 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
- if not vcodec:
- vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
- if codec in ('dvh1', 'dvhe'):
- hdr = 'DV'
- elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
- hdr = 'HDR10'
- elif full_codec.replace('0', '').startswith('vp9.2'):
- hdr = 'HDR10'
- elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
- if not acodec:
- acodec = full_codec
- elif codec in ('stpp', 'wvtt',):
- if not scodec:
- scodec = full_codec
+ parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
+ if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
+ 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
+ if vcodec:
+ continue
+ vcodec = full_codec
+ if parts[0] in ('dvh1', 'dvhe'):
+ hdr = 'DV'
+ elif parts[0] == 'av1' and traverse_obj(parts, 3) == '10':
+ hdr = 'HDR10'
+ elif parts[:2] == ['vp9', '2']:
+ hdr = 'HDR10'
+ elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac',
+ 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
+ acodec = acodec or full_codec
+ elif parts[0] in ('stpp', 'wvtt'):
+ scodec = scodec or full_codec
else:
write_string(f'WARNING: Unknown codec {full_codec}\n')
if vcodec or acodec or scodec:
return age_limit < content_limit
+# List of known byte-order-marks (BOM)
+BOMS = [
+ (b'\xef\xbb\xbf', 'utf-8'),
+ (b'\x00\x00\xfe\xff', 'utf-32-be'),
+ (b'\xff\xfe\x00\x00', 'utf-32-le'),
+ (b'\xff\xfe', 'utf-16-le'),
+ (b'\xfe\xff', 'utf-16-be'),
+]
+
+
def is_html(first_bytes):
""" Detect whether a file contains HTML by examining its first bytes. """
- BOMS = [
- (b'\xef\xbb\xbf', 'utf-8'),
- (b'\x00\x00\xfe\xff', 'utf-32-be'),
- (b'\xff\xfe\x00\x00', 'utf-32-le'),
- (b'\xff\xfe', 'utf-16-le'),
- (b'\xfe\xff', 'utf-16-be'),
- ]
-
encoding = 'utf-8'
for bom, enc in BOMS:
while first_bytes.startswith(bom):
elif ext == 'f4m':
return 'f4m'
- return compat_urllib_parse_urlparse(url).scheme
+ return urllib.parse.urlparse(url).scheme
def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
return NO_DEFAULT if interactive and not incomplete else None
else:
- video_title = info_dict.get('title') or info_dict.get('id') or 'video'
+ video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
filter_str = ') | ('.join(map(str.strip, filters))
return f'{video_title} does not pass filter ({filter_str}), skipping ..'
return _match_func
-def download_range_func(chapters, ranges):
- def inner(info_dict, ydl):
+class download_range_func:
+ def __init__(self, chapters, ranges):
+ self.chapters, self.ranges = chapters, ranges
+
+ def __call__(self, info_dict, ydl):
warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
else 'Cannot match chapters since chapter information is unavailable')
- for regex in chapters or []:
+ for regex in self.chapters or []:
for i, chapter in enumerate(info_dict.get('chapters') or []):
if re.search(regex, chapter['title']):
warning = None
yield {**chapter, 'index': i}
- if chapters and warning:
+ if self.chapters and warning:
ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
- yield from ({'start_time': start, 'end_time': end} for start, end in ranges or [])
+ yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
- return inner
+ def __eq__(self, other):
+ return (isinstance(other, download_range_func)
+ and self.chapters == other.chapters and self.ranges == other.ranges)
def parse_dfxp_time_expr(time_expr):
else:
block = code_or_block
addr, preflen = block.split('/')
- addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
+ addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
addr_max = addr_min | (0xffffffff >> int(preflen))
- return compat_str(socket.inet_ntoa(
- compat_struct_pack('!L', random.randint(addr_min, addr_max))))
+ return str(socket.inet_ntoa(
+ struct.pack('!L', random.randint(addr_min, addr_max))))
-class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
+class PerRequestProxyHandler(urllib.request.ProxyHandler):
def __init__(self, proxies=None):
# Set default handlers
for type in ('http', 'https'):
setattr(self, '%s_open' % type,
lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
meth(r, proxy, type))
- compat_urllib_request.ProxyHandler.__init__(self, proxies)
+ urllib.request.ProxyHandler.__init__(self, proxies)
def proxy_open(self, req, proxy, type):
req_proxy = req.headers.get('Ytdl-request-proxy')
if proxy == '__noproxy__':
return None # No Proxy
- if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
+ if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
req.add_header('Ytdl-socks-proxy', proxy)
# yt-dlp's http/https handlers do wrapping the socket with socks
return None
- return compat_urllib_request.ProxyHandler.proxy_open(
+ return urllib.request.ProxyHandler.proxy_open(
self, req, proxy, type)
s = b''
n = int(n)
while n > 0:
- s = compat_struct_pack('>I', n & 0xffffffff) + s
+ s = struct.pack('>I', n & 0xffffffff) + s
n = n >> 32
# strip off leading zeros
for i in range(len(s)):
s = b'\000' * extra + s
length = length + extra
for i in range(0, length, 4):
- acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
+ acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
return acc
raise ValueError('Either table or n must be specified')
table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
- if n != len(table):
+ if n and n != len(table):
raise ValueError(f'base {n} exceeds table length {len(table)}')
return table
raise OSError('Not a valid PNG file.')
int_map = {1: '>B', 2: '>H', 4: '>I'}
- unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
+ unpack_integer = lambda x: struct.unpack(int_map[len(x)], x)[0]
chunks = []
return
# UNIX Method 1. Use xattrs/pyxattrs modules
- from .dependencies import xattr
setxattr = None
if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
"""
- iri_parts = compat_urllib_parse_urlparse(iri)
+ iri_parts = urllib.parse.urlparse(iri)
if '[' in iri_parts.netloc:
raise ValueError('IPv6 URIs are not, yet, supported.')
return sys.stdin
+def determine_file_encoding(data):
+ """
+ Detect the text encoding used
+ @returns (encoding, bytes to skip)
+ """
+
+ # BOM marks are given priority over declarations
+ for bom, enc in BOMS:
+ if data.startswith(bom):
+ return enc, len(bom)
+
+ # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
+ # We ignore the endianness to get a good enough match
+ data = data.replace(b'\0', b'')
+ mobj = re.match(rb'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data)
+ return mobj.group(1).decode() if mobj else None, 0
+
+
class Config:
own_args = None
parsed_args = None
def init(self, args=None, filename=None):
assert not self.__initialized
+ self.own_args, self.filename = args, filename
+ return self.load_configs()
+
+ def load_configs(self):
directory = ''
- if filename:
- location = os.path.realpath(filename)
+ if self.filename:
+ location = os.path.realpath(self.filename)
directory = os.path.dirname(location)
if location in self._loaded_paths:
return False
self._loaded_paths.add(location)
- self.own_args, self.__initialized = args, True
- opts, _ = self.parser.parse_known_args(args)
- self.parsed_args, self.filename = args, filename
-
+ self.__initialized = True
+ opts, _ = self.parser.parse_known_args(self.own_args)
+ self.parsed_args = self.own_args
for location in opts.config_locations or []:
if location == '-':
self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
@staticmethod
def read_file(filename, default=[]):
try:
- optionf = open(filename)
+ optionf = open(filename, 'rb')
except OSError:
return default # silently skip if file is not present
+ try:
+ enc, skip = determine_file_encoding(optionf.read(512))
+ optionf.seek(skip, io.SEEK_SET)
+ except OSError:
+ enc = None # silently skip read errors
try:
# FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
- contents = optionf.read()
+ contents = optionf.read().decode(enc or preferredencoding())
res = shlex.split(contents, comments=True)
except Exception as err:
raise ValueError(f'Unable to parse "{filename}": {err}')
return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
+def cached_method(f):
+ """Cache a method"""
+ signature = inspect.signature(f)
+
+ @functools.wraps(f)
+ def wrapper(self, *args, **kwargs):
+ bound_args = signature.bind(self, *args, **kwargs)
+ bound_args.apply_defaults()
+ key = tuple(bound_args.arguments.values())
+
+ if not hasattr(self, '__cached_method__cache'):
+ self.__cached_method__cache = {}
+ cache = self.__cached_method__cache.setdefault(f.__name__, {})
+ if key not in cache:
+ cache[key] = f(self, *args, **kwargs)
+ return cache[key]
+ return wrapper
+
+
class classproperty:
- """classmethod(property(func)) that works in py < 3.9"""
+ """property access for class methods"""
def __init__(self, func):
functools.update_wrapper(self, func)
return self.__dict__.items()
+MEDIA_EXTENSIONS = Namespace(
+ common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
+ video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
+ common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
+ audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma'),
+ thumbnails=('jpg', 'png', 'webp'),
+ storyboards=('mhtml', ),
+ subtitles=('srt', 'vtt', 'ass', 'lrc'),
+ manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
+)
+MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
+MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
+
+KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
+
+
# Deprecated
has_certifi = bool(certifi)
has_websockets = bool(websockets)