+import base64
import calendar
import copy
import datetime
-import functools
import hashlib
import itertools
import json
import traceback
from .common import InfoExtractor, SearchInfoExtractor
+from ..compat import functools # isort: split
from ..compat import (
compat_chr,
compat_HTTPError,
NO_DEFAULT,
ExtractorError,
bug_reports_message,
+ classproperty,
clean_html,
datetime_from_str,
dict_get,
# invidious-redirect websites
r'(?:www\.)?redirect\.invidious\.io',
r'(?:(?:www|dev)\.)?invidio\.us',
- # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
+ # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
r'(?:www\.)?invidious\.pussthecat\.org',
r'(?:www\.)?invidious\.zee\.li',
r'(?:www\.)?invidious\.ethibox\.fr',
r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
+ # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
+ r'(?:www\.)?piped\.kavin\.rocks',
+ r'(?:www\.)?piped\.silkky\.cloud',
+ r'(?:www\.)?piped\.tokhmi\.xyz',
+ r'(?:www\.)?piped\.moomoo\.me',
+ r'(?:www\.)?il\.ax',
+ r'(?:www\.)?piped\.syncpundit\.com',
+ r'(?:www\.)?piped\.mha\.fi',
+ r'(?:www\.)?piped\.mint\.lgbt',
+ r'(?:www\.)?piped\.privacy\.com\.de',
)
def _initialize_consent(self):
self._check_login_required()
def _check_login_required(self):
- if (self._LOGIN_REQUIRED
- and self.get_param('cookiefile') is None
- and self.get_param('cookiesfrombrowser') is None):
+ if self._LOGIN_REQUIRED and not self._cookies_passed:
self.raise_login_required('Login details are needed to download this content', method='cookies')
- _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
- _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
- _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
+ _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
+ _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
def _get_default_ytcfg(self, client='web'):
return copy.deepcopy(INNERTUBE_CLIENTS[client])
ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
+ def _select_api_hostname(self, req_api_hostname, default_client=None):
+ return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
+ or req_api_hostname or self._get_innertube_host(default_client or 'web'))
+
def _extract_api_key(self, ytcfg=None, default_client='web'):
return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
real_headers.update({'content-type': 'application/json'})
if headers:
real_headers.update(headers)
+ api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
+ or api_key or self._extract_api_key(default_client=default_client))
return self._download_json(
- f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',
+ f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
video_id=video_id, fatal=fatal, note=note, errnote=errnote,
data=json.dumps(data).encode('utf8'), headers=real_headers,
- query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})
+ query={'key': api_key, 'prettyPrint': 'false'})
def extract_yt_initial_data(self, item_id, webpage, fatal=True):
- data = self._search_regex(
- (fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}',
- self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
- if data:
- return self._parse_json(data, item_id, fatal=fatal)
+ return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
@staticmethod
def _extract_session_index(*data):
args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
expected_type=str)
- @property
+ @functools.cached_property
def is_authenticated(self):
return bool(self._generate_sapisidhash_header())
self, *, ytcfg=None, account_syncid=None, session_index=None,
visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
- origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
+ origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
headers = {
'X-YouTube-Client-Name': compat_str(
self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
'description': 'md5:2ef1d002cad520f65825346e2084e49d',
},
'params': {'skip_download': True}
- },
+ }, {
+ # Story. Requires specific player params to work.
+ # Note: stories get removed after some period of time
+ 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
+ 'info_dict': {
+ 'id': 'vv8qTUWmulI',
+ 'ext': 'mp4',
+ 'availability': 'unlisted',
+ 'view_count': int,
+ 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
+ 'upload_date': '20220526',
+ 'categories': ['Education'],
+ 'title': 'Story',
+ 'channel': 'IT\'S HISTORY',
+ 'description': '',
+ 'uploader_id': 'BlastfromthePast',
+ 'duration': 12,
+ 'uploader': 'IT\'S HISTORY',
+ 'playable_in_embed': True,
+ 'age_limit': 0,
+ 'live_status': 'not_live',
+ 'tags': [],
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
+ 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
+ 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
+ }
+ }, {
+ 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
+ 'info_dict': {
+ 'id': 'tjjjtzRLHvA',
+ 'ext': 'mp4',
+ 'title': 'ハッシュタグ無し };if window.ytcsi',
+ 'upload_date': '20220323',
+ 'like_count': int,
+ 'availability': 'unlisted',
+ 'channel': 'nao20010128nao',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
+ 'age_limit': 0,
+ 'uploader': 'nao20010128nao',
+ 'uploader_id': 'nao20010128nao',
+ 'categories': ['Music'],
+ 'view_count': int,
+ 'description': '',
+ 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
+ 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'channel_follower_count': int,
+ 'duration': 6,
+ 'tags': [],
+ 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
+ }
+ }
]
@classmethod
last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
yield {
'url': last_segment_url,
+ 'fragment_count': last_seq,
}
if known_idx == last_seq:
no_fragment_score += 5
def _decrypt_signature(self, s, video_id, player_url):
"""Turn the encrypted s field into a working signature"""
-
- if player_url is None:
- raise ExtractorError('Cannot decrypt signature without player_url')
-
try:
player_id = (player_url, self._signature_cache_id(s))
if player_id not in self._player_cache:
- func = self._extract_signature_function(
- video_id, player_url, s
- )
+ func = self._extract_signature_function(video_id, player_url, s)
self._player_cache[player_id] = func
func = self._player_cache[player_id]
self._print_sig_code(func, s)
return func(s)
except Exception as e:
- raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
+ raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
def _decrypt_nsig(self, s, video_id, player_url):
"""Turn the encrypted n field into a working signature"""
chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
chapter_title = lambda chapter: self._get_text(chapter, 'title')
- return next((
- filter(None, (
- self._extract_chapters(
- traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
- chapter_time, chapter_title, duration)
- for contents in content_list
- ))), [])
-
- def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
- chapters = []
- last_chapter = {'start_time': 0}
- for idx, chapter in enumerate(chapter_list or []):
- title = chapter_title(chapter)
- start_time = chapter_time(chapter)
- if start_time is None:
- continue
- last_chapter['end_time'] = start_time
- if start_time < last_chapter['start_time']:
- if idx == 1:
- chapters.pop()
- self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
- else:
- self.report_warning(f'Invalid start time for chapter "{title}"')
- continue
- last_chapter = {'start_time': start_time, 'title': title}
- chapters.append(last_chapter)
- last_chapter['end_time'] = duration
- return chapters
+ return next(filter(None, (
+ self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
+ chapter_time, chapter_title, duration)
+ for contents in content_list)), [])
+
+ def _extract_chapters_from_description(self, description, duration):
+ return self._extract_chapters(
+ re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
+ chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
+ duration=duration, strict=False)
- def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
- return self._parse_json(self._search_regex(
- (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',
- regex), webpage, name, default='{}'), video_id, fatal=False)
+ def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
+ if not duration:
+ return
+ chapter_list = [{
+ 'start_time': chapter_time(chapter),
+ 'title': chapter_title(chapter),
+ } for chapter in chapter_list or []]
+ if not strict:
+ chapter_list.sort(key=lambda c: c['start_time'] or 0)
+
+ chapters = [{'start_time': 0, 'title': '<Untitled>'}]
+ for idx, chapter in enumerate(chapter_list):
+ if chapter['start_time'] is None or not chapter['title']:
+ self.report_warning(f'Incomplete chapter {idx}')
+ elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
+ chapters[-1]['end_time'] = chapter['start_time']
+ chapters.append(chapter)
+ else:
+ self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
+ chapters[-1]['end_time'] = duration
+ return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:]
def _extract_comment(self, comment_renderer, parent=None):
comment_id = comment_renderer.get('commentId')
lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
continuation = self._extract_continuation(root_continuation_data)
- message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
- if message and not parent:
- self.report_warning(message, video_id=video_id)
response = None
+ is_forced_continuation = False
is_first_continuation = parent is None
+ if is_first_continuation and not continuation:
+ # Sometimes you can get comments by generating the continuation yourself,
+ # even if YouTube initially reports them being disabled - e.g. stories comments.
+ # Note: if the comment section is actually disabled, YouTube may return a response with
+ # required check_get_keys missing. So we will disable that check initially in this case.
+ continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
+ is_forced_continuation = True
for page_num in itertools.count(0):
if not continuation:
response = self._extract_response(
item_id=None, query=continuation,
ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
- check_get_keys='onResponseReceivedEndpoints')
-
+ check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
+ is_forced_continuation = False
continuation_contents = traverse_obj(
response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
if continuation:
break
+ message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
+ if message and not parent and tracker['running_total'] == 0:
+ self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
+
+ @staticmethod
+ def _generate_comment_continuation(video_id):
+ """
+ Generates initial comment section continuation token from given video id
+ """
+ token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
+ return base64.b64encode(token.encode()).decode()
+
def _get_comments(self, ytcfg, video_id, contents, webpage):
"""Entry for comment extraction"""
def _real_comment_extract(contents):
headers = self.generate_api_headers(
ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
- yt_query = {'videoId': video_id}
+ yt_query = {
+ 'videoId': video_id,
+ 'params': '8AEB' # enable stories
+ }
yt_query.update(self._generate_player_context(sts))
return self._extract_response(
item_id=video_id, ep='player', query=yt_query,
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
initial_pr = None
if webpage:
- initial_pr = self._extract_yt_initial_variable(
- webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
- video_id, 'initial player response')
+ initial_pr = self._search_json(
+ self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
all_clients = set(clients)
clients = clients[::-1]
sc = compat_parse_qs(fmt.get('signatureCipher'))
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
encrypted_sig = try_get(sc, lambda x: x['s'][0])
- if not (sc and fmt_url and encrypted_sig):
+ if not all((sc, fmt_url, player_url, encrypted_sig)):
continue
- if not player_url:
+ try:
+ fmt_url += '&%s=%s' % (
+ traverse_obj(sc, ('sp', -1)) or 'signature',
+ self._decrypt_signature(encrypted_sig, video_id, player_url)
+ )
+ except ExtractorError as e:
+ self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True)
+ self.write_debug(e, only_once=True)
continue
- signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
- sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
- fmt_url += '&' + sp + '=' + signature
query = parse_qs(fmt_url)
throttled = False
'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
except ExtractorError as e:
self.report_warning(
- f'nsig extraction failed: You may experience throttling for some formats\n'
- f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
+ 'nsig extraction failed: You may experience throttling for some formats\n'
+ f'n = {query["n"][0]} ; player = {player_url}', only_once=True)
+ self.write_debug(e, only_once=True)
throttled = True
if itag:
# Eg: __2ABJjxzNo, ySuUZEjARPY
is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
if is_damaged:
- self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
+ self.report_warning(
+ f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
dct = {
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
' (default)' if language_preference > 0 else ''),
fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
- 'source_preference': -10 if throttled else -1,
+ # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
+ 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
'fps': int_or_none(fmt.get('fps')) or None,
'height': height,
'quality': q(quality),
skip_manifests = self._configuration_arg('skip')
if not self.get_param('youtube_include_hls_manifest', True):
skip_manifests.append('hls')
+ if not self.get_param('youtube_include_dash_manifest', True):
+ skip_manifests.append('dash')
get_dash = 'dash' not in skip_manifests and (
not is_live or live_from_start or self._configuration_arg('include_live_dash'))
get_hls = not live_from_start and 'hls' not in skip_manifests
webpage = None
if 'webpage' not in self._configuration_arg('player_skip'):
webpage = self._download_webpage(
- webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
+ webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)
master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
or get_first(microformats, 'lengthSeconds')
or parse_duration(search_meta('duration'))) or None
+ if get_first(video_details, 'isPostLiveDvr'):
+ self.write_debug('Video is in Post-Live Manifestless mode')
+ if duration or 0 > 4 * 3600:
+ self.report_warning(
+ 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
+ 'This is a known issue and patches are welcome')
+
live_broadcast_details, is_live, streaming_data, formats = self._list_formats(
video_id, microformats, video_details, player_responses, player_url, duration)
original_thumbnails = thumbnails.copy()
# The best resolution thumbnails sometimes does not appear in the webpage
- # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
+ # See: https://github.com/yt-dlp/yt-dlp/issues/340
# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
thumbnail_names = [
- 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
- 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
- 'mqdefault', 'mq1', 'mq2', 'mq3',
- 'default', '1', '2', '3'
+ # While the *1,*2,*3 thumbnails are just below their correspnding "*default" variants
+ # in resolution, these are not the custom thumbnail. So de-prioritize them
+ 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
+ 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
]
n_thumbnail_names = len(thumbnail_names)
thumbnails.extend({
# Youtube Music Auto-generated description
if video_description:
- mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
+ mobj = re.search(
+ r'''(?xs)
+ (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
+ (?P<album>[^\n]+)
+ (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
+ (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
+ (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
+ .+\nAuto-generated\ by\ YouTube\.\s*$
+ ''', video_description)
if mobj:
release_year = mobj.group('release_year')
release_date = mobj.group('release_date')
initial_data = None
if webpage:
- initial_data = self._extract_yt_initial_variable(
- webpage, self._YT_INITIAL_DATA_RE, video_id,
- 'yt initial data')
+ initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
if not initial_data:
query = {'videoId': video_id}
query.update(self._get_checkok_params())
headers=self.generate_api_headers(ytcfg=master_ytcfg),
note='Downloading initial data API JSON')
- try:
- # This will error if there is no livechat
+ try: # This will error if there is no livechat
initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
+ except (KeyError, IndexError, TypeError):
+ pass
+ else:
info.setdefault('subtitles', {})['live_chat'] = [{
- 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
+ 'url': f'https://www.youtube.com/watch?v={video_id}', # url is needed to set cookies
'video_id': video_id,
'ext': 'json',
'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
}]
- except (KeyError, IndexError, TypeError):
- pass
if initial_data:
info['chapters'] = (
self._extract_chapters_from_json(initial_data, duration)
or self._extract_chapters_from_engagement_panel(initial_data, duration)
+ or self._extract_chapters_from_description(video_description, duration)
or None)
contents = traverse_obj(
unified_strdate(get_first(microformats, 'uploadDate'))
or unified_strdate(search_meta('uploadDate')))
if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
- upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d')
+ upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
info['upload_date'] = upload_date
for to, frm in fallbacks.items():
return
for content in contents:
renderer = content.get('backstagePostThreadRenderer')
- if not isinstance(renderer, dict):
+ if isinstance(renderer, dict):
+ yield from self._post_thread_entries(renderer)
continue
- yield from self._post_thread_entries(renderer)
+ renderer = content.get('videoRenderer')
+ if isinstance(renderer, dict):
+ yield self._video_entry(renderer)
r''' # unused
def _rich_grid_entries(self, contents):
self._extract_visitor_data(data, ytcfg)),
**metadata)
- def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
+ def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
first_id = last_id = response = None
for page_num in itertools.count(1):
videos = list(self._playlist_entries(playlist))
start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
if start >= len(videos):
return
- for video in videos[start:]:
- if video['id'] == first_id:
- self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
- return
- yield video
+ yield from videos[start:]
first_id = first_id or videos[0]['id']
last_id = videos[-1]['id']
watch_endpoint = try_get(
playlist_url = urljoin(url, try_get(
playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
compat_str))
- if playlist_url and playlist_url != url:
+
+ # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
+ # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
+ is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
+
+ if playlist_url and playlist_url != url and not is_known_unviewable:
return self.url_result(
playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
video_title=title)
return self.playlist_result(
- self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
+ self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
playlist_id=playlist_id, playlist_title=title)
def _extract_availability(self, data):
check_get_keys='contents', fatal=False, ytcfg=ytcfg,
note='Downloading API JSON with unavailable videos')
- @property
+ @functools.cached_property
def skip_webpage(self):
return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
channel = traverse_obj(
notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
expected_type=str)
+ notification_title = self._get_text(notification, 'shortMessage')
+ if notification_title:
+ notification_title = notification_title.replace('\xad', '') # remove soft hyphens
+ # TODO: handle recommended videos
title = self._search_regex(
- rf'{re.escape(channel)} [^:]+: (.+)', self._get_text(notification, 'shortMessage'),
+ rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
'video title', default=None)
- if title:
- title = title.replace('\xad', '') # remove soft hyphens
upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
else None)
class YoutubeFeedsInfoExtractor(InfoExtractor):
"""
Base class for feed extractors
- Subclasses must define the _FEED_NAME property.
+ Subclasses must re-define the _FEED_NAME property.
"""
_LOGIN_REQUIRED = True
+ _FEED_NAME = 'feeds'
def _real_initialize(self):
YoutubeBaseInfoExtractor._check_login_required(self)
- @property
+ @classproperty
def IE_NAME(self):
- return 'youtube:%s' % self._FEED_NAME
+ return f'youtube:{self._FEED_NAME}'
def _real_extract(self, url):
return self.url_result(
}]
+class YoutubeStoriesIE(InfoExtractor):
+ IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
+ IE_NAME = 'youtube:stories'
+ _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
+ _TESTS = [{
+ 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = f'RLTD{self._match_id(url)}'
+ return self.url_result(
+ f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',
+ ie=YoutubeTabIE, video_id=playlist_id)
+
+
class YoutubeTruncatedURLIE(InfoExtractor):
IE_NAME = 'youtube:truncated_url'
IE_DESC = False # Do not list