import http.client
import http.cookiejar
import http.cookies
+import inspect
import itertools
import json
import math
from ..compat import functools # isort: split
from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
+from ..cookies import LenientSimpleCookie
from ..downloader import FileDownloader
from ..downloader.f4m import get_base_url, remove_encrypted_media
from ..utils import (
'password': f'Use {password_hint}',
'cookies': (
'Use --cookies-from-browser or --cookies for the authentication. '
- 'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to manually pass cookies'),
+ 'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'),
}[method if method is not NO_DEFAULT else 'any' if self.supports_login() else 'cookies']
def __init__(self, downloader=None):
fatal, has_default = False, True
json_string = self._search_regex(
- rf'{start_pattern}\s*(?P<json>{{\s*{contains_pattern}\s*}})\s*{end_pattern}',
+ rf'(?:{start_pattern})\s*(?P<json>{{\s*(?:{contains_pattern})\s*}})\s*(?:{end_pattern})',
string, name, group='json', fatal=fatal, default=None if has_default else NO_DEFAULT)
if not json_string:
return default
info['chapters'] = chapters
def extract_video_object(e):
- assert is_type(e, 'VideoObject')
author = e.get('author')
info.update({
'url': url_or_none(e.get('contentUrl')),
+ 'ext': mimetype2ext(e.get('encodingFormat')),
'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')),
'thumbnails': [{'url': unescapeHTML(url)}
# however some websites are using 'Text' type instead.
# 1. https://schema.org/VideoObject
'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, str) else None,
+ 'artist': traverse_obj(e, ('byArtist', 'name'), expected_type=str),
'filesize': int_or_none(float_or_none(e.get('contentSize'))),
'tbr': int_or_none(e.get('bitrate')),
'width': int_or_none(e.get('width')),
'height': int_or_none(e.get('height')),
'view_count': int_or_none(e.get('interactionCount')),
+ 'tags': try_call(lambda: e.get('keywords').split(',')),
})
+ if is_type(e, 'AudioObject'):
+ info.update({
+ 'vcodec': 'none',
+ 'abr': int_or_none(e.get('bitrate')),
+ })
extract_interaction_statistic(e)
extract_chapter_information(e)
extract_video_object(e['video'][0])
elif is_type(traverse_obj(e, ('subjectOf', 0)), 'VideoObject'):
extract_video_object(e['subjectOf'][0])
- elif is_type(e, 'VideoObject'):
+ elif is_type(e, 'VideoObject', 'AudioObject'):
extract_video_object(e)
if expected_type is None:
continue
def prepare_template(template_name, identifiers):
tmpl = representation_ms_info[template_name]
+ if representation_id is not None:
+ tmpl = tmpl.replace('$RepresentationID$', representation_id)
# First of, % characters outside $...$ templates
# must be escaped by doubling for proper processing
# by % operator string formatting used further (see
t += c
# Next, $...$ templates are translated to their
# %(...) counterparts to be used with % operator
- if representation_id is not None:
- t = t.replace('$RepresentationID$', representation_id)
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
t.replace('$$', '$')
def _get_cookies(self, url):
""" Return a http.cookies.SimpleCookie with the cookies for the url """
- return http.cookies.SimpleCookie(self._downloader._calc_cookies(url))
+ return LenientSimpleCookie(self._downloader._calc_cookies(url))
def _apply_first_set_cookie_header(self, url_handle, cookie):
"""
return True
def _error_or_warning(self, err, _count=None, _retries=0, *, fatal=True):
- RetryManager.report_retry(err, _count or int(fatal), _retries, info=self.to_screen, warn=self.report_warning,
- sleep_func=self.get_param('retry_sleep_functions', {}).get('extractor'))
+ RetryManager.report_retry(
+ err, _count or int(fatal), _retries,
+ info=self.to_screen, warn=self.report_warning, error=None if fatal else self.report_warning,
+ sleep_func=self.get_param('retry_sleep_functions', {}).get('extractor'))
def RetryManager(self, **kwargs):
return RetryManager(self.get_param('extractor_retries', 3), self._error_or_warning, **kwargs)
"""Only for compatibility with some older extractors"""
return next(iter(cls._extract_embed_urls(None, webpage) or []), None)
+ @classmethod
+ def __init_subclass__(cls, *, plugin_name=None, **kwargs):
+ if plugin_name:
+ mro = inspect.getmro(cls)
+ super_class = cls.__wrapped__ = mro[mro.index(cls) + 1]
+ cls.IE_NAME, cls.ie_key = f'{super_class.IE_NAME}+{plugin_name}', super_class.ie_key
+ while getattr(super_class, '__wrapped__', None):
+ super_class = super_class.__wrapped__
+ setattr(sys.modules[super_class.__module__], super_class.__name__, cls)
+
+ return super().__init_subclass__(**kwargs)
+
class SearchInfoExtractor(InfoExtractor):
"""