import threading
import time
import traceback
-import urllib.error
import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor
from .openload import PhantomJSwrapper
from ..compat import functools
from ..jsinterp import JSInterpreter
+from ..networking.exceptions import HTTPError, network_exceptions
from ..utils import (
NO_DEFAULT,
ExtractorError,
join_nonempty,
js_to_json,
mimetype2ext,
- network_exceptions,
orderedSet,
parse_codecs,
parse_count,
THIRD_PARTY = {
'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
}
- BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
+ BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')
priority = qualities(BASE_CLIENTS[::-1])
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
AVAILABILITY_PREMIUM = enum.auto()
AVAILABILITY_SUBSCRIPTION = enum.auto()
LIVE_NOW = enum.auto()
+ VERIFIED = enum.auto()
class YoutubeBaseInfoExtractor(InfoExtractor):
cookies = self._get_cookies('https://www.youtube.com/')
if cookies.get('__Secure-3PSID'):
return
- consent_id = None
- consent = cookies.get('CONSENT')
- if consent:
- if 'YES' in consent.value:
- return
- consent_id = self._search_regex(
- r'PENDING\+(\d+)', consent.value, 'consent', default=None)
- if not consent_id:
- consent_id = random.randint(100, 999)
- self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
+ socs = cookies.get('SOCS')
+ if socs and not socs.value.startswith('CAA'): # not consented
+ return
+ self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
def _initialize_pref(self):
cookies = self._get_cookies('https://www.youtube.com/')
def _extract_and_report_alerts(self, data, *args, **kwargs):
return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
- def _extract_badges(self, renderer: dict):
- privacy_icon_map = {
+ def _extract_badges(self, badge_list: list):
+ """
+ Extract known BadgeType's from a list of badge renderers.
+ @returns [{'type': BadgeType}]
+ """
+ icon_type_map = {
'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
- 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
+ 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
+ 'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
+ 'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
+ 'CHECK': BadgeType.VERIFIED,
}
badge_style_map = {
'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
- 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
+ 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
+ 'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
+ 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
}
label_map = {
'private': BadgeType.AVAILABILITY_PRIVATE,
'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
'live': BadgeType.LIVE_NOW,
- 'premium': BadgeType.AVAILABILITY_PREMIUM
+ 'premium': BadgeType.AVAILABILITY_PREMIUM,
+ 'verified': BadgeType.VERIFIED,
+ 'official artist channel': BadgeType.VERIFIED,
}
badges = []
- for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer')):
+ for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
badge_type = (
- privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
+ icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
or badge_style_map.get(traverse_obj(badge, 'style'))
)
if badge_type:
continue
# fallback, won't work in some languages
- label = traverse_obj(badge, 'label', expected_type=str, default='')
+ label = traverse_obj(
+ badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
for match, label_badge_type in label_map.items():
if match in label.lower():
- badges.append({'type': badge_type})
- continue
+ badges.append({'type': label_badge_type})
+ break
return badges
def extract_relative_time(relative_time_text):
"""
Extracts a relative time from string and converts to dt object
- e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
+ e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
"""
- mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
+
+ # XXX: this could be moved to a general function in utils/_utils.py
+ # The relative time text strings are roughly the same as what
+ # Javascript's Intl.RelativeTimeFormat function generates.
+ # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
+ mobj = re.search(
+ r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
+ relative_time_text)
if mobj:
start = mobj.group('start')
if start:
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
default_client='web'):
- for retry in self.RetryManager():
+ raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
+ # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
+ icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
+ icd_rm = next(icd_retries)
+ main_retries = iter(self.RetryManager())
+ main_rm = next(main_retries)
+ # Manual retry loop for multiple RetryManagers
+ # The proper RetryManager MUST be advanced after an error
+ # and its result MUST be checked if the manager is non fatal
+ while True:
try:
response = self._call_api(
ep=ep, fatal=True, headers=headers,
except ExtractorError as e:
if not isinstance(e.cause, network_exceptions):
return self._error_or_warning(e, fatal=fatal)
- elif not isinstance(e.cause, urllib.error.HTTPError):
- retry.error = e
+ elif not isinstance(e.cause, HTTPError):
+ main_rm.error = e
+ next(main_retries)
continue
- first_bytes = e.cause.read(512)
+ first_bytes = e.cause.response.read(512)
if not is_html(first_bytes):
yt_error = try_get(
self._parse_json(
- self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
+ self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
lambda x: x['error']['message'], str)
if yt_error:
self._report_alerts([('ERROR', yt_error)], fatal=False)
# Downloading page may result in intermittent 5xx HTTP error
- # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
+ # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
# We also want to catch all other network exceptions since errors in later pages can be troublesome
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
- if e.cause.code not in (403, 429):
- retry.error = e
+ if e.cause.status not in (403, 429):
+ main_rm.error = e
+ next(main_retries)
continue
return self._error_or_warning(e, fatal=fatal)
try:
self._extract_and_report_alerts(response, only_once=True)
except ExtractorError as e:
- # YouTube servers may return errors we want to retry on in a 200 OK response
+ # YouTube's servers may return errors we want to retry on in a 200 OK response
# See: https://github.com/yt-dlp/yt-dlp/issues/839
if 'unknown error' in e.msg.lower():
- retry.error = e
+ main_rm.error = e
+ next(main_retries)
continue
return self._error_or_warning(e, fatal=fatal)
# Youtube sometimes sends incomplete data
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
if not traverse_obj(response, *variadic(check_get_keys)):
- retry.error = ExtractorError('Incomplete data received', expected=True)
+ icd_rm.error = ExtractorError('Incomplete data received', expected=True)
+ should_retry = next(icd_retries, None)
+ if not should_retry:
+ return None
continue
return response
overlay_style = traverse_obj(
renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
get_all=False, expected_type=str)
- badges = self._extract_badges(renderer)
-
+ badges = self._extract_badges(traverse_obj(renderer, 'badges'))
+ owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
expected_type=str)) or ''
else self._get_count({'simpleText': view_count_text}))
view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
+ channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')
+ or self._get_text(reel_header_renderer, 'channelTitleText'))
+
+ channel_handle = traverse_obj(renderer, (
+ 'shortBylineText', 'runs', ..., 'navigationEndpoint',
+ (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),
+ expected_type=self.handle_from_url, get_all=False)
return {
'_type': 'url',
'ie_key': YoutubeIE.ie_key(),
'description': description,
'duration': duration,
'channel_id': channel_id,
- 'channel': (self._get_text(renderer, 'ownerText', 'shortBylineText')
- or self._get_text(reel_header_renderer, 'channelTitleText')),
+ 'channel': channel,
'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
+ 'uploader': channel,
+ 'uploader_id': channel_handle,
+ 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
'timestamp': (self._parse_time_text(time_text)
if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
view_count_field: view_count,
- 'live_status': live_status
+ 'live_status': live_status,
+ 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None
}
'uploader': 'Philipp Hagemeister',
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
'uploader_id': '@PhilippHagemeister',
+ 'heatmap': 'count:100',
}
},
{
'uploader': 'Philipp Hagemeister',
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
'uploader_id': '@PhilippHagemeister',
+ 'heatmap': 'count:100',
},
'params': {
'skip_download': True,
'uploader': 'The Witcher',
'uploader_url': 'https://www.youtube.com/@thewitcher',
'uploader_id': '@thewitcher',
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
},
},
{
'uploader': 'FlyingKitty',
'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
'uploader_id': '@FlyingKitty900',
+ 'comment_count': int,
+ 'channel_is_verified': True,
},
},
{
'uploader': 'Olympics',
'uploader_url': 'https://www.youtube.com/@Olympics',
'uploader_id': '@Olympics',
+ 'channel_is_verified': True,
},
'params': {
'skip_download': 'requires avconv',
'uploader': 'Bernie Sanders',
'uploader_url': 'https://www.youtube.com/@BernieSanders',
'uploader_id': '@BernieSanders',
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
},
'params': {
'skip_download': True,
'uploader': 'Vsauce',
'uploader_url': 'https://www.youtube.com/@Vsauce',
'uploader_id': '@Vsauce',
+ 'comment_count': int,
+ 'channel_is_verified': True,
},
'params': {
'skip_download': True,
'uploader': 'kudvenkat',
'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
'uploader_id': '@Csharp-video-tutorialsBlogspot',
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
},
'params': {
'skip_download': True,
'uploader': 'CBS Mornings',
'uploader_url': 'https://www.youtube.com/@CBSMornings',
'uploader_id': '@CBSMornings',
+ 'comment_count': int,
+ 'channel_is_verified': True,
}
},
{
'uploader': 'colinfurze',
'uploader_url': 'https://www.youtube.com/@colinfurze',
'uploader_id': '@colinfurze',
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
},
'params': {
'format': '17', # 3gp format available on android
'uploader': 'SciShow',
'uploader_url': 'https://www.youtube.com/@SciShow',
'uploader_id': '@SciShow',
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
}, 'params': {'format': 'mhtml', 'skip_download': True}
}, {
# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
'uploader': 'Leon Nguyen',
'uploader_url': 'https://www.youtube.com/@LeonNguyen',
'uploader_id': '@LeonNguyen',
+ 'heatmap': 'count:100',
}
}, {
# Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
'uploader': 'Leon Nguyen',
'uploader_url': 'https://www.youtube.com/@LeonNguyen',
'uploader_id': '@LeonNguyen',
+ 'heatmap': 'count:100',
},
'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
}, {
'uploader': 'Quackity',
'uploader_id': '@Quackity',
'uploader_url': 'https://www.youtube.com/@Quackity',
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
}
},
{ # continuous livestream. Microformat upload date should be preferred.
'uploader_id': '@abaointokyo',
},
'params': {'skip_download': True}
- }, {
- # Story. Requires specific player params to work.
- 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
- 'info_dict': {
- 'id': 'vv8qTUWmulI',
- 'ext': 'mp4',
- 'availability': 'unlisted',
- 'view_count': int,
- 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
- 'upload_date': '20220526',
- 'categories': ['Education'],
- 'title': 'Story',
- 'channel': 'IT\'S HISTORY',
- 'description': '',
- 'duration': 12,
- 'playable_in_embed': True,
- 'age_limit': 0,
- 'live_status': 'not_live',
- 'tags': [],
- 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
- 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
- },
- 'skip': 'stories get removed after some period of time',
}, {
'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
'info_dict': {
'uploader': 'MrBeast',
'uploader_url': 'https://www.youtube.com/@MrBeast',
'uploader_id': '@MrBeast',
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
},
'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
}, {
'uploader': 'さなちゃんねる',
'uploader_url': 'https://www.youtube.com/@sana_natori',
'uploader_id': '@sana_natori',
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
},
},
{
'thumbnail': r're:^https?://.*\.webp',
'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
'playable_in_embed': True,
+ 'comment_count': int,
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
},
'params': {
'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
'uploader': 'Christopher Sykes',
'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
'uploader_id': '@ChristopherSykesDocumentaries',
+ 'heatmap': 'count:100',
},
'params': {
'skip_download': True,
# Obtain from MPD's maximum seq value
old_mpd_url = mpd_url
last_error = ctx.pop('last_error', None)
- expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
+ expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
or (mpd_url, stream_number, False))
if not refresh_sequence:
return funcname
return json.loads(js_to_json(self._search_regex(
- rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
+ rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
def _extract_n_function_code(self, video_id, player_url):
chapter_time, chapter_title, duration)
for contents in content_list)), [])
+ def _extract_heatmap(self, data):
+ return traverse_obj(data, (
+ 'frameworkUpdates', 'entityBatchUpdate', 'mutations',
+ lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
+ 'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
+ 'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),
+ 'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
+ 'value': ('intensityScoreNormalized', {float_or_none}),
+ })) or None
+
def _extract_comment(self, comment_renderer, parent=None):
comment_id = comment_renderer.get('commentId')
if not comment_id:
return
- text = self._get_text(comment_renderer, 'contentText')
+ info = {
+ 'id': comment_id,
+ 'text': self._get_text(comment_renderer, 'contentText'),
+ 'like_count': self._get_count(comment_renderer, 'voteCount'),
+ 'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),
+ 'author': self._get_text(comment_renderer, 'authorText'),
+ 'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),
+ 'parent': parent or 'root',
+ }
# Timestamp is an estimate calculated from the current time and time_text
time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
timestamp = self._parse_time_text(time_text)
- author = self._get_text(comment_renderer, 'authorText')
- author_id = try_get(comment_renderer,
- lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
+ info.update({
+ # FIXME: non-standard, but we need a way of showing that it is an estimate.
+ '_time_text': time_text,
+ 'timestamp': timestamp,
+ })
- votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
- lambda x: x['likeCount']), str)) or 0
- author_thumbnail = try_get(comment_renderer,
- lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
+ info['author_url'] = urljoin(
+ 'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (
+ ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),
+ expected_type=str, get_all=False))
- author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
- is_favorited = 'creatorHeart' in (try_get(
- comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
- return {
- 'id': comment_id,
- 'text': text,
- 'timestamp': timestamp,
- 'time_text': time_text,
- 'like_count': votes,
- 'is_favorited': is_favorited,
- 'author': author,
- 'author_id': author_id,
- 'author_thumbnail': author_thumbnail,
- 'author_is_uploader': author_is_uploader,
- 'parent': parent or 'root'
- }
+ author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')
+ if author_is_uploader is not None:
+ info['author_is_uploader'] = author_is_uploader
+
+ comment_abr = traverse_obj(
+ comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
+ if comment_abr is not None:
+ info['is_favorited'] = 'creatorHeart' in comment_abr
+
+ badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
+ if self._has_badge(badges, BadgeType.VERIFIED):
+ info['author_is_verified'] = True
+
+ is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
+ if is_pinned:
+ info['is_pinned'] = True
+
+ return info
def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
expected_comment_count = self._get_count(
comments_header_renderer, 'countText', 'commentsCount')
- if expected_comment_count:
+ if expected_comment_count is not None:
tracker['est_total'] = expected_comment_count
self.to_screen(f'Downloading ~{expected_comment_count} comments')
comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
comment = self._extract_comment(comment_renderer, parent)
if not comment:
continue
- is_pinned = bool(traverse_obj(comment_renderer, 'pinnedCommentBadge'))
comment_id = comment['id']
- if is_pinned:
+ if comment.get('is_pinned'):
tracker['pinned_comment_ids'].add(comment_id)
# Sometimes YouTube may break and give us infinite looping comments.
# See: https://github.com/yt-dlp/yt-dlp/issues/6290
if comment_id in tracker['seen_comment_ids']:
- if comment_id in tracker['pinned_comment_ids'] and not is_pinned:
+ if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):
# Pinned comments may appear a second time in newest first sort
# See: https://github.com/yt-dlp/yt-dlp/issues/6712
continue
- self.report_warning('Detected YouTube comments looping. Stopping comment extraction as we probably cannot get any more.')
+ self.report_warning(
+ 'Detected YouTube comments looping. Stopping comment extraction '
+ f'{"for this thread" if parent else ""} as we probably cannot get any more.')
yield
else:
tracker['seen_comment_ids'].add(comment['id'])
if not tracker:
tracker = dict(
running_total=0,
- est_total=0,
+ est_total=None,
current_page_thread=0,
total_parent_comments=0,
total_reply_comments=0,
continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
is_forced_continuation = True
+ continuation_items_path = (
+ 'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
for page_num in itertools.count(0):
if not continuation:
break
headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
- comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
+ comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"
if page_num == 0:
if is_first_continuation:
note_prefix = 'Downloading comment section API JSON'
note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
' ' if parent else '', ' replies' if parent else '',
page_num, comment_prog_str)
+
+ # Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
+ # Ignore check if YouTube says the comment count is 0.
+ check_get_keys = None
+ if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
+ check_get_keys = [[*continuation_items_path, ..., (
+ 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]
try:
response = self._extract_response(
item_id=None, query=continuation,
ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
- check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
+ check_get_keys=check_get_keys)
except ExtractorError as e:
# Ignore incomplete data error for replies if retries didn't work.
# This is to allow any other parent comments and comment threads to be downloaded.
# See: https://github.com/yt-dlp/yt-dlp/issues/4669
- if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:
- self.report_warning(
- 'Received incomplete data for a comment reply thread and retrying did not help. '
- 'Ignoring to let other comments be downloaded.')
- else:
- raise
+ if 'incomplete data' in str(e).lower() and parent:
+ if self.get_param('ignoreerrors') in (True, 'only_download'):
+ self.report_warning(
+ 'Received incomplete data for a comment reply thread and retrying did not help. '
+ 'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
+ return
+ else:
+ raise ExtractorError(
+ 'Incomplete data received for comment reply thread. '
+ 'Pass --ignore-errors to ignore and allow rest of comments to download.',
+ expected=True)
+ raise
is_forced_continuation = False
- continuation_contents = traverse_obj(
- response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
-
continuation = None
- for continuation_section in continuation_contents:
- continuation_items = traverse_obj(
- continuation_section,
- (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
- get_all=False, expected_type=list) or []
+ for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
if is_first_continuation:
continuation = extract_header(continuation_items)
is_first_continuation = False
def _is_unplayable(player_response):
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
- _STORY_PLAYER_PARAMS = '8AEB'
-
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
yt_query = {
'videoId': video_id,
}
- if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
- yt_query['params'] = self._STORY_PLAYER_PARAMS
+ if _split_innertube_client(client)[0] == 'android':
+ yt_query['params'] = 'CgIQBg=='
+
+ pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
+ if pp_arg:
+ yt_query['params'] = pp_arg
yt_query.update(self._generate_player_context(sts))
return self._extract_response(
def _get_requested_clients(self, url, smuggled_data):
requested_clients = []
- default = ['android', 'web']
+ default = ['ios', 'android', 'web']
allowed_clients = sorted(
(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
def _needs_live_processing(self, live_status, duration):
if (live_status == 'is_live' and self.get_param('live_from_start')
- or live_status == 'post_live' and (duration or 0) > 4 * 3600):
+ or live_status == 'post_live' and (duration or 0) > 2 * 3600):
return live_status
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
])
streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
- all_formats = self._configuration_arg('include_duplicate_formats')
+ format_types = self._configuration_arg('formats')
+ all_formats = 'duplicate' in format_types
+ if self._configuration_arg('include_duplicate_formats'):
+ all_formats = True
+ self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
+ 'Use formats=duplicate extractor argument instead')
def build_fragments(f):
return LazyList({
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
+ name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
+ fps = int_or_none(fmt.get('fps')) or 0
dct = {
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
'format_note': join_nonempty(
join_nonempty(audio_track.get('displayName'),
language_preference > 0 and ' (default)', delim=''),
- fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
- fmt.get('isDrc') and 'DRC',
+ name, fmt.get('isDrc') and 'DRC',
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
throttled and 'THROTTLED', is_damaged and 'DAMAGED',
(self.get_param('verbose') or all_formats) and client_name,
delim=', '),
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
- 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
- 'fps': int_or_none(fmt.get('fps')) or None,
+ 'source_preference': ((-10 if throttled else -5 if itag == '22' else -1)
+ + (100 if 'Premium' in name else 0)),
+ 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
'audio_channels': fmt.get('audioChannels'),
'height': height,
'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
if single_stream and dct.get('ext'):
dct['container'] = dct['ext'] + '_dash'
- if all_formats and dct['filesize']:
+ if (all_formats or 'dashy' in format_types) and dct['filesize']:
yield {
**dct,
'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
'protocol': 'http_dash_segments',
'fragments': build_fragments(dct),
}
- dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
- yield dct
+ if all_formats or 'dashy' not in format_types:
+ dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
+ yield dct
needs_live_processing = self._needs_live_processing(live_status, duration)
- skip_bad_formats = not self._configuration_arg('include_incomplete_formats')
+ skip_bad_formats = 'incomplete' not in format_types
+ if self._configuration_arg('include_incomplete_formats'):
+ skip_bad_formats = False
+ self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
+ 'Use formats=incomplete extractor argument instead')
skip_manifests = set(self._configuration_arg('skip'))
if (not self.get_param('youtube_include_hls_manifest', True)
skip_manifests.add('dash')
if self._configuration_arg('include_live_dash'):
self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
- 'Use include_incomplete_formats extractor argument instead')
+ 'Use formats=incomplete extractor argument instead')
elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
skip_manifests.add('dash')
elif itag:
f['format_id'] = itag
+ if f.get('source_preference') is None:
+ f['source_preference'] = -1
+
+ if itag in ('616', '235'):
+ f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
+ f['source_preference'] += 100
+
f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
if f['quality'] == -1 and f.get('height'):
f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
- if self.get_param('verbose'):
+ if self.get_param('verbose') or all_formats:
f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
+ if f.get('fps') and f['fps'] <= 1:
+ del f['fps']
+
+ if proto == 'hls' and f.get('has_drm'):
+ f['has_drm'] = 'maybe'
+ f['source_preference'] -= 5
return True
subtitles = {}
webpage = None
if 'webpage' not in self._configuration_arg('player_skip'):
query = {'bpctr': '9999999999', 'has_verified': '1'}
- if smuggled_data.get('is_story'):
- query['pp'] = self._STORY_PLAYER_PARAMS
+ pp = self._configuration_arg('player_params', [None], casesense=True)[0]
+ if pp:
+ query['pp'] = pp
webpage = self._download_webpage(
webpage_url, video_id, fatal=False, query=query)
else None)
streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
*formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
+ if all(f.get('has_drm') for f in formats):
+ # If there are no formats that definitely don't have DRM, all have DRM
+ for f in formats:
+ f['has_drm'] = True
return live_broadcast_details, live_status, streaming_data, formats, subtitles
for fmt in filter(is_bad_format, formats):
fmt['preference'] = (fmt.get('preference') or -1) - 10
- fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')
+ fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
if needs_live_processing:
self._prepare_live_from_start_formats(
continue
trans_code += f'-{lang_code}'
trans_name += format_field(lang_name, None, ' from %s')
- # Add an "-orig" label to the original language so that it can be distinguished.
- # The subs are returned without "-orig" as well for compatibility
if lang_code == f'a-{orig_trans_code}':
+ # Set audio language based on original subtitles
+ for f in formats:
+ if f.get('acodec') != 'none' and not f.get('language'):
+ f['language'] = orig_trans_code
+ # Add an "-orig" label to the original language so that it can be distinguished.
+ # The subs are returned without "-orig" as well for compatibility
process_language(
automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
# Setting tlang=lang returns damaged subtitles.
info[d_k] = parse_duration(query[k][0])
# Youtube Music Auto-generated description
- if video_description:
+ if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
+ # XXX: Causes catastrophic backtracking if description has "·"
+ # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
+ # Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
+ # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
mobj = re.search(
r'''(?xs)
- (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
- (?P<album>[^\n]+)
+ (?=(?P<track>[^\n·]+))(?P=track)·
+ (?=(?P<artist>[^\n]+))(?P=artist)\n+
+ (?=(?P<album>[^\n]+))(?P=album)\n
(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
- (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
- .+\nAuto-generated\ by\ YouTube\.\s*$
+ (.+?\nArtist\s*:\s*
+ (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
+ )?.+\nAuto-generated\ by\ YouTube\.\s*$
''', video_description)
if mobj:
release_year = mobj.group('release_year')
or self._extract_chapters_from_description(video_description, duration)
or None)
+ info['heatmap'] = self._extract_heatmap(initial_data)
+
contents = traverse_obj(
initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
expected_type=list, default=[])
info['artist'] = mrr_contents_text
elif mrr_title == 'Song':
info['track'] = mrr_contents_text
+ owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
+ if self._has_badge(owner_badges, BadgeType.VERIFIED):
+ info['channel_is_verified'] = True
info.update({
'uploader': info.get('channel'),
and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
):
upload_date = strftime_or_none(
- self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
+ self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
info['upload_date'] = upload_date
for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
if v:
info[d_k] = v
- badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
+ badges = self._extract_badges(traverse_obj(vpir, 'badges'))
is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
or get_first(video_details, 'isPrivate', expected_type=bool))
channel_id = self.ucid_or_none(renderer['channelId'])
title = self._get_text(renderer, 'title')
channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
- # As of 2023-03-01 YouTube doesn't use the channel handles on these renderers yet.
- # However we can expect them to change that in the future.
channel_handle = self.handle_from_url(
traverse_obj(renderer, (
'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
('browseEndpoint', 'canonicalBaseUrl')),
{str}), get_all=False))
+ if not channel_handle:
+ # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
+ channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
return {
'_type': 'url',
'url': channel_url,
'title': title,
'uploader_id': channel_handle,
'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
- 'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),
+ # See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
+ # However, in feed/channels this is set correctly to the subscriber count
+ 'channel_follower_count': traverse_obj(
+ renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
- 'playlist_count': self._get_count(renderer, 'videoCountText'),
+ 'playlist_count': (
+ # videoCountText may be the subscriber count
+ self._get_count(renderer, 'videoCountText')
+ if self._get_count(renderer, 'subscriberCountText') is not None else None),
'description': self._get_text(renderer, 'descriptionSnippet'),
+ 'channel_is_verified': True if self._has_badge(
+ self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
}
def _grid_entries(self, grid_renderer):
'videoRenderer': lambda x: [self._video_entry(x)],
'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
- 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
+ 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],
+ 'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),
}
for key, renderer in isr_content.items():
if key not in known_renderers:
or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
yield from extract_entries(parent_renderer)
continuation = continuation_list[0]
-
+ seen_continuations = set()
for page_num in itertools.count(1):
if not continuation:
break
+ continuation_token = continuation.get('continuation')
+ if continuation_token is not None and continuation_token in seen_continuations:
+ self.write_debug('Detected YouTube feed looping - assuming end of feed.')
+ break
+ seen_continuations.add(continuation_token)
headers = self.generate_api_headers(
ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
response = self._extract_response(
'uploader_id': channel_handle,
'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
})
+
+ channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
+ if self._has_badge(channel_badges, BadgeType.VERIFIED):
+ info['channel_is_verified'] = True
# Playlist stats is a text runs array containing [video count, view count, last updated].
# last updated or (view count and last updated) may be missing.
playlist_stats = get_first(
last_updated_unix = self._parse_time_text(
self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
- info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')
+ info['modified_date'] = strftime_or_none(last_updated_unix)
info['view_count'] = self._get_count(playlist_stats, 1)
if info['view_count'] is None: # 0 is allowed
playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
player_header_privacy = playlist_header_renderer.get('privacy')
- badges = self._extract_badges(sidebar_renderer)
+ badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
privacy_setting_icon = get_first(
data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
except ExtractorError as e:
if isinstance(e.cause, network_exceptions):
- if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
+ if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):
retry.error = e
continue
self._error_or_warning(e, fatal=fatal)
'uploader_url': 'https://www.youtube.com/@3blue1brown',
'uploader': '3Blue1Brown',
'tags': ['Mathematics'],
- 'channel_follower_count': int
+ 'channel_follower_count': int,
+ 'channel_is_verified': True,
},
}, {
'note': 'playlists, singlepage',
'uploader_url': 'https://www.youtube.com/@3blue1brown',
'uploader_id': '@3blue1brown',
'uploader': '3Blue1Brown',
+ 'channel_is_verified': True,
},
}, {
'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
}, {
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
'info_dict': {
- 'id': 'AlTsmyW4auo', # This will keep changing
+ 'id': 'hGkQjiJLjWQ', # This will keep changing
'ext': 'mp4',
'title': str,
'upload_date': r're:\d{8}',
'uploader_url': 'https://www.youtube.com/@SkyNews',
'uploader_id': '@SkyNews',
'uploader': 'Sky News',
+ 'channel_is_verified': True,
},
'params': {
'skip_download': True,
'uploader_id': '@colethedj1894',
'uploader': 'colethedj',
},
+ 'playlist': [{
+ 'info_dict': {
+ 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
+ 'id': 'BaW_jenozKc',
+ '_type': 'url',
+ 'ie_key': 'Youtube',
+ 'duration': 10,
+ 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
+ 'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
+ 'view_count': int,
+ 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
+ 'channel': 'Philipp Hagemeister',
+ 'uploader_id': '@PhilippHagemeister',
+ 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
+ 'uploader': 'Philipp Hagemeister',
+ }
+ }],
'playlist_count': 1,
+ 'params': {'extract_flat': True},
}, {
'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
'url': 'https://www.youtube.com/feed/recommended',
'channel_url': str,
'concurrent_view_count': int,
'channel': str,
+ 'uploader': str,
+ 'uploader_url': str,
+ 'uploader_id': str,
+ 'channel_is_verified': bool, # this will keep changing
}
}],
'params': {'extract_flat': True, 'playlist_items': '1'},
'uploader': 'PewDiePie',
'uploader_url': 'https://www.youtube.com/@PewDiePie',
'uploader_id': '@PewDiePie',
+ 'channel_is_verified': True,
}
}],
'params': {'extract_flat': True},
'uploader_url': 'https://www.youtube.com/@3blue1brown',
'uploader_id': '@3blue1brown',
'uploader': '3Blue1Brown',
+ 'channel_is_verified': True,
},
'playlist_count': 0,
}, {
'description': 'I make music',
'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
'channel_follower_count': int,
+ 'channel_is_verified': True,
},
'playlist_mincount': 10,
+ }, {
+ # Playlist with only shorts, shown as reel renderers
+ # FIXME: future: YouTube currently doesn't give continuation for this,
+ # may do in future.
+ 'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
+ 'info_dict': {
+ 'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
+ 'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
+ 'view_count': int,
+ 'uploader_id': '@BangyShorts',
+ 'description': '',
+ 'uploader_url': 'https://www.youtube.com/@BangyShorts',
+ 'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
+ 'channel': 'Bangy Shorts',
+ 'uploader': 'Bangy Shorts',
+ 'tags': [],
+ 'availability': 'public',
+ 'modified_date': '20230626',
+ 'title': 'Uploads from Bangy Shorts',
+ },
+ 'playlist_mincount': 100,
+ 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}]
@classmethod
'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
'title': 'Kurzgesagt – In a Nutshell',
'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
- 'playlist_count': int, # XXX: should have a way of saying > 1
+ # No longer available for search as it is set to the handle.
+ # 'playlist_count': int,
'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
'thumbnails': list,
'uploader_id': '@kurzgesagt',
'uploader_url': 'https://www.youtube.com/@kurzgesagt',
'uploader': 'Kurzgesagt – In a Nutshell',
+ 'channel_is_verified': True,
+ 'channel_follower_count': int,
}
}],
'params': {'extract_flat': True, 'playlist_items': '1'},
}]
-class YoutubeStoriesIE(InfoExtractor):
- IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
- IE_NAME = 'youtube:stories'
- _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
- _TESTS = [{
- 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- playlist_id = f'RLTD{self._match_id(url)}'
- return self.url_result(
- smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
- ie=YoutubeTabIE, video_id=playlist_id)
-
-
class YoutubeShortsAudioPivotIE(InfoExtractor):
IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
IE_NAME = 'youtube:shorts:pivot:audio'
'live_status': 'not_live',
'channel_follower_count': int,
'chapters': 'count:20',
+ 'comment_count': int,
+ 'heatmap': 'count:100',
}
}]
'channel': 'さなちゃんねる',
'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
'uploader': 'さなちゃんねる',
+ 'channel_is_verified': True,
+ 'heatmap': 'count:100',
},
'add_ie': ['Youtube'],
'params': {'skip_download': 'Youtube'},