X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/0ca0f88121db5a1e9c223077af1b78c62d5ead6d..e63faa101cf7b9bf9f899cabb74ce03c7f893572:/yt_dlp/extractor/youtube.py diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 83be162c9..719a151c4 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -24,12 +24,14 @@ from ..utils import ( NO_DEFAULT, ExtractorError, + LazyList, UserNotLive, bug_reports_message, classproperty, clean_html, datetime_from_str, dict_get, + filter_dict, float_or_none, format_field, get_first, @@ -291,8 +293,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): _RESERVED_NAMES = ( r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|' r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|' - r'browse|oembed|get_video_info|iframe_api|s/player|' - r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout') + r'browse|oembed|get_video_info|iframe_api|s/player|source|' + r'storefront|oops|index|account|t/terms|about|upload|signin|logout') _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)' @@ -389,6 +391,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko' ] + _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'} + @functools.cached_property def _preferred_lang(self): """ @@ -617,7 +621,7 @@ def generate_api_headers( if auth is not None: headers['Authorization'] = auth headers['X-Origin'] = origin - return {h: v for h, v in headers.items() if v is not None} + return filter_dict(headers) def _download_ytcfg(self, client, video_id): url = { @@ -672,20 +676,10 @@ def _extract_continuation(cls, renderer): if next_continuation: return next_continuation - contents = [] - for key in ('contents', 'items'): - contents.extend(try_get(renderer, lambda x: x[key], list) or []) - - for content in contents: - if not isinstance(content, dict): - continue - continuation_ep = try_get( - content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'], - lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']), - dict) - continuation = cls._extract_continuation_ep_data(continuation_ep) - if continuation: - return continuation + return traverse_obj(renderer, ( + ('contents', 'items', 'rows'), ..., 'continuationItemRenderer', + ('continuationEndpoint', ('button', 'buttonRenderer', 'command')) + ), get_all=False, expected_type=cls._extract_continuation_ep_data) @classmethod def _extract_alerts(cls, data): @@ -701,12 +695,11 @@ def _extract_alerts(cls, data): yield alert_type, message def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False): - errors = [] - warnings = [] + errors, warnings = [], [] for alert_type, alert_message in alerts: if alert_type.lower() == 'error' and fatal: errors.append([alert_type, alert_message]) - else: + elif alert_message not in self._IGNORED_WARNINGS: warnings.append([alert_type, alert_message]) for alert_type, alert_message in (warnings + errors[:-1]): @@ -919,8 +912,7 @@ def _extract_video(self, renderer): traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str), video_id, default=None, group='duration')) - view_count = self._get_count(renderer, 'viewCountText') - + view_count = self._get_count(renderer, 'viewCountText', 'shortViewCountText') uploader = self._get_text(renderer, 'ownerText', 'shortBylineText') channel_id = traverse_obj( renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), @@ -939,6 +931,12 @@ def _extract_video(self, renderer): if overlay_style == 'SHORTS' or '/shorts/' in navigation_url: url = f'https://www.youtube.com/shorts/{video_id}' + live_status = ( + 'is_upcoming' if scheduled_timestamp is not None + else 'was_live' if 'streamed' in time_text.lower() + else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW) + else None) + return { '_type': 'url', 'ie_key': YoutubeIE.ie_key(), @@ -947,17 +945,12 @@ def _extract_video(self, renderer): 'title': title, 'description': description, 'duration': duration, - 'view_count': view_count, 'uploader': uploader, 'channel_id': channel_id, 'thumbnails': thumbnails, - 'upload_date': (strftime_or_none(self._parse_time_text(time_text), '%Y%m%d') - if self._configuration_arg('approximate_date', ie_key='youtubetab') - else None), - 'live_status': ('is_upcoming' if scheduled_timestamp is not None - else 'was_live' if 'streamed' in time_text.lower() - else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW) - else None), + 'timestamp': (self._parse_time_text(time_text) + if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE) + else None), 'release_timestamp': scheduled_timestamp, 'availability': 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC) @@ -965,7 +958,9 @@ def _extract_video(self, renderer): is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None, needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None, needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None, - is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None) + is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None), + 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count': view_count, + 'live_status': live_status } @@ -1727,7 +1722,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'live_status': 'not_live', 'playable_in_embed': True, 'comment_count': int, - 'channel_follower_count': int + 'channel_follower_count': int, + 'chapters': list, }, 'params': { 'skip_download': True, @@ -1760,7 +1756,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'live_status': 'not_live', 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg', 'comment_count': int, - 'channel_follower_count': int + 'channel_follower_count': int, + 'chapters': list, }, 'params': { 'skip_download': True, @@ -2025,7 +2022,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'duration': 522, 'channel': 'kudvenkat', 'comment_count': int, - 'channel_follower_count': int + 'channel_follower_count': int, + 'chapters': list, }, 'params': { 'skip_download': True, @@ -2175,7 +2173,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'like_count': int, 'live_status': 'not_live', 'playable_in_embed': True, - 'channel_follower_count': int + 'channel_follower_count': int, + 'chapters': list, }, 'params': { 'format': '17', # 3gp format available on android @@ -2219,7 +2218,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'duration': 248, 'categories': ['Education'], 'age_limit': 0, - 'channel_follower_count': int + 'channel_follower_count': int, + 'chapters': list, }, 'params': {'format': 'mhtml', 'skip_download': True} }, { # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939) @@ -2335,6 +2335,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'view_count': int, 'playable_in_embed': True, 'description': 'md5:2ef1d002cad520f65825346e2084e49d', + 'concurrent_view_count': int, }, 'params': {'skip_download': True} }, { @@ -2501,10 +2502,8 @@ def __init__(self, *args, **kwargs): self._code_cache = {} self._player_cache = {} - def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data): + def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live): lock = threading.Lock() - - is_live = True start_time = time.time() formats = [f for f in formats if f.get('is_from_start')] @@ -2519,7 +2518,8 @@ def refetch_manifest(format_id, delay): microformats = traverse_obj( prs, (..., 'microformat', 'playerMicroformatRenderer'), expected_type=dict, default=[]) - _, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url) + _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url) + is_live = live_status == 'is_live' start_time = time.time() def mpd_feed(format_id, delay): @@ -2540,12 +2540,17 @@ def mpd_feed(format_id, delay): return f['manifest_url'], f['manifest_stream_number'], is_live for f in formats: - f['is_live'] = True - f['protocol'] = 'http_dash_segments_generator' - f['fragments'] = functools.partial( - self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed) + f['is_live'] = is_live + gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'], + live_start_time, mpd_feed, not is_live and f.copy()) + if is_live: + f['fragments'] = gen + f['protocol'] = 'http_dash_segments_generator' + else: + f['fragments'] = LazyList(gen({})) + del f['is_from_start'] - def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx): + def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx): FETCH_SPAN, MAX_DURATION = 5, 432000 mpd_url, stream_number, is_live = None, None, True @@ -2576,15 +2581,18 @@ def _extract_sequence_from_mpd(refresh_sequence, immediate): return False, last_seq elif old_mpd_url == mpd_url: return True, last_seq - try: - fmts, _ = self._extract_mpd_formats_and_subtitles( - mpd_url, None, note=False, errnote=False, fatal=False) - except ExtractorError: - fmts = None - if not fmts: - no_fragment_score += 2 - return False, last_seq - fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number) + if manifestless_orig_fmt: + fmt_info = manifestless_orig_fmt + else: + try: + fmts, _ = self._extract_mpd_formats_and_subtitles( + mpd_url, None, note=False, errnote=False, fatal=False) + except ExtractorError: + fmts = None + if not fmts: + no_fragment_score += 2 + return False, last_seq + fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number) fragments = fmt_info['fragments'] fragment_base_url = fmt_info['fragment_base_url'] assert fragment_base_url @@ -2592,6 +2600,7 @@ def _extract_sequence_from_mpd(refresh_sequence, immediate): _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1)) return True, _last_seq + self.write_debug(f'[{video_id}] Generating fragments for format {format_id}') while is_live: fetch_time = time.time() if no_fragment_score > 30: @@ -2645,6 +2654,11 @@ def _extract_sequence_from_mpd(refresh_sequence, immediate): except ExtractorError: continue + if manifestless_orig_fmt: + # Stop at the first iteration if running for post-live manifestless; + # fragment count no longer increase since it starts + break + time.sleep(max(0, FETCH_SPAN + fetch_time - time.time())) def _extract_player_url(self, *ytcfgs, webpage=None): @@ -2824,7 +2838,7 @@ def _decrypt_nsig(self, s, video_id, player_url): self.report_warning( f'Native nsig extraction failed: Trying with PhantomJS\n' f' n = {s} ; player = {player_url}', video_id) - self.write_debug(e) + self.write_debug(e, only_once=True) args, func_body = func_code ret = jsi.execute( @@ -2942,7 +2956,7 @@ def _mark_watched(self, video_id, player_responses): # these seem to mark watchtime "history" in the real world # they're required, so send in a single value qs.update({ - 'st': video_length, + 'st': 0, 'et': video_length, }) @@ -3034,8 +3048,9 @@ def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, self.report_warning(f'Incomplete chapter {idx}') elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration: chapters.append(chapter) - else: - self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"') + elif chapter not in chapters: + self.report_warning( + f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"') return chapters[1:] def _extract_comment(self, comment_renderer, parent=None): @@ -3404,7 +3419,12 @@ def append_client(*client_names): self.report_warning(last_error) return prs, player_url - def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration): + def _needs_live_processing(self, live_status, duration): + if (live_status == 'is_live' and self.get_param('live_from_start') + or live_status == 'post_live' and (duration or 0) > 4 * 3600): + return live_status + + def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration): itags, stream_ids = {}, [] itag_qualities, res_qualities = {}, {0: None} q = qualities([ @@ -3551,15 +3571,22 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, i dct['container'] = dct['ext'] + '_dash' yield dct - live_from_start = is_live and self.get_param('live_from_start') - skip_manifests = self._configuration_arg('skip') - if not self.get_param('youtube_include_hls_manifest', True): - skip_manifests.append('hls') + needs_live_processing = self._needs_live_processing(live_status, duration) + skip_bad_formats = not self._configuration_arg('include_incomplete_formats') + + skip_manifests = set(self._configuration_arg('skip')) + if (not self.get_param('youtube_include_hls_manifest', True) + or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway + or needs_live_processing and skip_bad_formats): + skip_manifests.add('hls') + if not self.get_param('youtube_include_dash_manifest', True): - skip_manifests.append('dash') - get_dash = 'dash' not in skip_manifests and ( - not is_live or live_from_start or self._configuration_arg('include_live_dash')) - get_hls = not live_from_start and 'hls' not in skip_manifests + skip_manifests.add('dash') + if self._configuration_arg('include_live_dash'): + self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. ' + 'Use include_incomplete_formats extractor argument instead') + elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live': + skip_manifests.add('dash') def process_manifest_format(f, proto, itag): if itag in itags: @@ -3577,16 +3604,17 @@ def process_manifest_format(f, proto, itag): subtitles = {} for sd in streaming_data: - hls_manifest_url = get_hls and sd.get('hlsManifestUrl') + hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl') if hls_manifest_url: - fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live) + fmts, subs = self._extract_m3u8_formats_and_subtitles( + hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live') subtitles = self._merge_subtitles(subs, subtitles) for f in fmts: if process_manifest_format(f, 'hls', self._search_regex( r'/itag/(\d+)', f['url'], 'itag', default=None)): yield f - dash_manifest_url = get_dash and sd.get('dashManifestUrl') + dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl') if dash_manifest_url: formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False) subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH @@ -3594,7 +3622,7 @@ def process_manifest_format(f, proto, itag): if process_manifest_format(f, 'dash', f['format_id']): f['filesize'] = int_or_none(self._search_regex( r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None)) - if live_from_start: + if needs_live_processing: f['is_from_start'] = True yield f @@ -3660,11 +3688,19 @@ def _list_formats(self, video_id, microformats, video_details, player_responses, is_live = get_first(video_details, 'isLive') if is_live is None: is_live = get_first(live_broadcast_details, 'isLiveNow') - + live_content = get_first(video_details, 'isLiveContent') + is_upcoming = get_first(video_details, 'isUpcoming') + post_live = get_first(video_details, 'isPostLiveDvr') + live_status = ('post_live' if post_live + else 'is_live' if is_live + else 'is_upcoming' if is_upcoming + else 'was_live' if live_content + else 'not_live' if False in (is_live, live_content) + else None) streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[]) - *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration) + *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration) - return live_broadcast_details, is_live, streaming_data, formats, subtitles + return live_broadcast_details, live_status, streaming_data, formats, subtitles def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) @@ -3756,8 +3792,10 @@ def feed_entry(name): or get_first(microformats, 'lengthSeconds') or parse_duration(search_meta('duration'))) or None - live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \ - self._list_formats(video_id, microformats, video_details, player_responses, player_url) + live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \ + self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration) + if live_status == 'post_live': + self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode') if not formats: if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')): @@ -3816,7 +3854,7 @@ def feed_entry(name): thumbnails.extend({ 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format( video_id=video_id, name=name, ext=ext, - webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''), + webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''), } for name in thumbnail_names for ext in ('webp', 'jpg')) for thumb in thumbnails: i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names) @@ -3831,20 +3869,27 @@ def feed_entry(name): or search_meta('channelId')) owner_profile_url = get_first(microformats, 'ownerProfileUrl') - live_content = get_first(video_details, 'isLiveContent') - is_upcoming = get_first(video_details, 'isUpcoming') - if is_live is None: - if is_upcoming or live_content is False: - is_live = False - if is_upcoming is None and (live_content or is_live): - is_upcoming = False live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp')) live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp')) if not duration and live_end_time and live_start_time: duration = live_end_time - live_start_time - if is_live and self.get_param('live_from_start'): - self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data) + needs_live_processing = self._needs_live_processing(live_status, duration) + + def is_bad_format(fmt): + if needs_live_processing and not fmt.get('is_from_start'): + return True + elif (live_status == 'is_live' and needs_live_processing != 'is_live' + and fmt.get('protocol') == 'http_dash_segments'): + return True + + for fmt in filter(is_bad_format, formats): + fmt['preference'] = (fmt.get('preference') or -1) - 10 + fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ') + + if needs_live_processing: + self._prepare_live_from_start_formats( + formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live') formats.extend(self._extract_storyboard(player_responses, duration)) @@ -3879,22 +3924,10 @@ def feed_entry(name): 'categories': [category] if category else None, 'tags': keywords, 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'), - 'is_live': is_live, - 'was_live': (False if is_live or is_upcoming or live_content is False - else None if is_live is None or is_upcoming is None - else live_content), - 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL + 'live_status': live_status, 'release_timestamp': live_start_time, } - if get_first(video_details, 'isPostLiveDvr'): - self.write_debug('Video is in Post-Live Manifestless mode') - info['live_status'] = 'post_live' - if (duration or 0) > 4 * 3600: - self.report_warning( - 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. ' - 'This is a known issue and patches are welcome') - subtitles = {} pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict) if pctr: @@ -4024,7 +4057,8 @@ def process_language(container, base_url, lang_code, sub_name, query): 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1', 'video_id': video_id, 'ext': 'json', - 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay', + 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming') + else 'youtube_live_chat_replay'), }] if initial_data: @@ -4085,6 +4119,15 @@ def process_language(container, base_url, lang_code, sub_name, query): 'like_count': str_to_int(like_count), 'dislike_count': str_to_int(dislike_count), }) + vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer')) + if vcr: + vc = self._get_count(vcr, 'viewCount') + # Upcoming premieres with waiting count are treated as live here + if vcr.get('isLive'): + info['concurrent_view_count'] = vc + elif info.get('view_count') is None: + info['view_count'] = vc + vsir = get_first(contents, 'videoSecondaryInfoRenderer') if vsir: vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer')) @@ -4131,9 +4174,7 @@ def process_language(container, base_url, lang_code, sub_name, query): unified_strdate(get_first(microformats, 'uploadDate')) or unified_strdate(search_meta('uploadDate'))) if not upload_date or ( - not info.get('is_live') - and not info.get('was_live') - and info.get('live_status') != 'is_upcoming' + live_status in ('not_live', None) and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', []) ): upload_date = strftime_or_none( @@ -4405,6 +4446,13 @@ def _rich_grid_entries(self, contents): yield entry ''' + def _report_history_entries(self, renderer): + for url in traverse_obj(renderer, ( + 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ..., + 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ..., + 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')): + yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE) + def _extract_entries(self, parent_renderer, continuation_list): # continuation_list is modified in-place with continuation_list = [continuation_token] continuation_list[:] = [None] @@ -4416,12 +4464,16 @@ def _extract_entries(self, parent_renderer, continuation_list): content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation', expected_type=dict) if not is_renderer: - renderer = content.get('richItemRenderer') - if renderer: - for entry in self._rich_entries(renderer): + if content.get('richItemRenderer'): + for entry in self._rich_entries(content['richItemRenderer']): yield entry continuation_list[0] = self._extract_continuation(parent_renderer) + elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory + table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer')) + yield from self._report_history_entries(table) + continuation_list[0] = self._extract_continuation(table) continue + isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or [] for isr_content in isr_contents: if not isinstance(isr_content, dict): @@ -4482,26 +4534,6 @@ def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data): # See: https://github.com/ytdl-org/youtube-dl/issues/28702 visitor_data = self._extract_visitor_data(response) or visitor_data - known_continuation_renderers = { - 'playlistVideoListContinuation': self._playlist_entries, - 'gridContinuation': self._grid_entries, - 'itemSectionContinuation': self._post_thread_continuation_entries, - 'sectionListContinuation': extract_entries, # for feeds - } - continuation_contents = try_get( - response, lambda x: x['continuationContents'], dict) or {} - continuation_renderer = None - for key, value in continuation_contents.items(): - if key not in known_continuation_renderers: - continue - continuation_renderer = value - continuation_list = [None] - yield from known_continuation_renderers[key](continuation_renderer) - continuation = continuation_list[0] or self._extract_continuation(continuation_renderer) - break - if continuation_renderer: - continue - known_renderers = { 'videoRenderer': (self._grid_entries, 'items'), # for membership tab 'gridPlaylistRenderer': (self._grid_entries, 'items'), @@ -4510,24 +4542,32 @@ def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data): 'playlistVideoRenderer': (self._playlist_entries, 'contents'), 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds 'richItemRenderer': (extract_entries, 'contents'), # for hashtag - 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents') + 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'), + 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'), + 'playlistVideoListContinuation': (self._playlist_entries, None), + 'gridContinuation': (self._grid_entries, None), + 'itemSectionContinuation': (self._post_thread_continuation_entries, None), + 'sectionListContinuation': (extract_entries, None), # for feeds } - on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints')) - continuation_items = try_get( - on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list) - continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {} + + continuation_items = traverse_obj(response, ( + ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ..., + 'appendContinuationItemsAction', 'continuationItems' + ), 'continuationContents', get_all=False) + continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={}) + video_items_renderer = None - for key, value in continuation_item.items(): + for key in continuation_item.keys(): if key not in known_renderers: continue - video_items_renderer = {known_renderers[key][1]: continuation_items} + func, parent_key = known_renderers[key] + video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items continuation_list = [None] - yield from known_renderers[key][0](video_items_renderer) + yield from func(video_items_renderer) continuation = continuation_list[0] or self._extract_continuation(video_items_renderer) + + if not video_items_renderer: break - if video_items_renderer: - continue - break @staticmethod def _extract_selected_tab(tabs, fatal=True): @@ -4553,7 +4593,7 @@ def _extract_uploader(self, data): uploader['uploader_url'] = urljoin( 'https://www.youtube.com/', try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str)) - return {k: v for k, v in uploader.items() if v is not None} + return filter_dict(uploader) def _extract_from_tabs(self, item_id, ytcfg, data, tabs): playlist_id = title = description = channel_url = channel_name = channel_id = None @@ -6067,9 +6107,9 @@ def _extract_notification_renderer(self, notification): title = self._search_regex( rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title, 'video title', default=None) - upload_date = (strftime_or_none(self._parse_time_text(self._get_text(notification, 'sentTimeText')), '%Y%m%d') - if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key()) - else None) + timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText')) + if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE) + else None) return { '_type': 'url', 'url': url, @@ -6079,7 +6119,7 @@ def _extract_notification_renderer(self, notification): 'channel_id': channel_id, 'channel': channel, 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'), - 'upload_date': upload_date, + 'timestamp': timestamp, } def _notification_menu_entries(self, ytcfg): @@ -6318,14 +6358,11 @@ def _real_extract(self, url): class YoutubeShortsAudioPivotIE(InfoExtractor): - IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video); "ytshortsap:" prefix' + IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)' IE_NAME = 'youtube:shorts:pivot:audio' - _VALID_URL = f'(?x)^ytshortsap:{YoutubeIE._VALID_URL[5:]}' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P[\w-]{11})/shorts' _TESTS = [{ - 'url': 'ytshortsap:https://www.youtube.com/shorts/Lyj-MZSAA9o?feature=share', - 'only_matching': True, - }, { - 'url': 'ytshortsap:Lyj-MZSAA9o', + 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts', 'only_matching': True, }]