X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/962ffcf89c8d935410391fbea3580688aafe76d7..45e8a04e48fc83fb25c2b13f1c0e668b99838ad4:/yt_dlp/extractor/youtube.py diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 1a9c88f35..09e2127e3 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1074,6 +1074,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'age_limit': 0, 'start_time': 1, 'end_time': 9, + 'comment_count': int, 'channel_follower_count': int } }, @@ -1118,6 +1119,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg', 'live_status': 'not_live', 'age_limit': 0, + 'comment_count': int, 'channel_follower_count': int }, 'params': { @@ -1260,6 +1262,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'categories': ['Entertainment'], 'duration': 106, 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ', + 'comment_count': int, 'channel_follower_count': int }, }, @@ -1347,7 +1350,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20150827', 'uploader_id': 'olympic', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic', - 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', + 'description': 'md5:04bbbf3ccceb6795947572ca36f45904', 'uploader': 'Olympics', 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games', 'like_count': int, @@ -1396,6 +1399,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'like_count': int, 'live_status': 'not_live', 'availability': 'unlisted', + 'comment_count': int, 'channel_follower_count': int }, }, @@ -1624,6 +1628,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp', 'live_status': 'not_live', 'playable_in_embed': True, + 'comment_count': int, 'channel_follower_count': int }, 'params': { @@ -1656,6 +1661,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'view_count': int, 'live_status': 'not_live', 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg', + 'comment_count': int, 'channel_follower_count': int }, 'params': { @@ -1920,6 +1926,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'view_count': int, 'duration': 522, 'channel': 'kudvenkat', + 'comment_count': int, 'channel_follower_count': int }, 'params': { @@ -2141,6 +2148,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'availability': 'public', 'channel': 'Leon Nguyen', 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp', + 'comment_count': int, 'channel_follower_count': int } }, { @@ -2204,7 +2212,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'params': {'skip_download': True} }, { # Story. Requires specific player params to work. - # Note: stories get removed after some period of time 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI', 'info_dict': { 'id': 'vv8qTUWmulI', @@ -2227,7 +2234,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp', 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast', 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA', - } + }, + 'skip': 'stories get removed after some period of time', }, { 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA', 'info_dict': { @@ -2467,6 +2475,7 @@ def _extract_signature_function(self, video_id, player_url, example_sig): func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}' assert os.path.basename(func_id) == func_id + self.write_debug(f'Extracting signature function {func_id}') cache_spec = self.cache.load('youtube-sigfuncs', func_id) if cache_spec is not None: return lambda s: ''.join(s[i] for i in cache_spec) @@ -2714,10 +2723,10 @@ def _extract_url(webpage): @classmethod def extract_id(cls, url): - mobj = re.match(cls._VALID_URL, url, re.VERBOSE) - if mobj is None: - raise ExtractorError('Invalid URL: %s' % url) - return mobj.group('id') + video_id = cls.get_temp_id(url) + if not video_id: + raise ExtractorError(f'Invalid URL: {url}') + return video_id def _extract_chapters_from_json(self, data, duration): chapter_list = traverse_obj( @@ -2763,17 +2772,15 @@ def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, if not strict: chapter_list.sort(key=lambda c: c['start_time'] or 0) - chapters = [{'start_time': 0, 'title': ''}] + chapters = [{'start_time': 0}] for idx, chapter in enumerate(chapter_list): - if chapter['start_time'] is None or not chapter['title']: + if chapter['start_time'] is None: self.report_warning(f'Incomplete chapter {idx}') elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration: - chapters[-1]['end_time'] = chapter['start_time'] chapters.append(chapter) else: self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"') - chapters[-1]['end_time'] = duration - return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:] + return chapters[1:] def _extract_comment(self, comment_renderer, parent=None): comment_id = comment_renderer.get('commentId') @@ -3333,6 +3340,9 @@ def _extract_storyboard(self, player_responses, duration): 'url': url, 'width': width, 'height': height, + 'fps': frame_count / duration, + 'rows': rows, + 'columns': cols, 'fragments': [{ 'url': url.replace('$M', str(j)), 'duration': min(fragment_duration, duration - (j * fragment_duration)), @@ -3448,7 +3458,7 @@ def feed_entry(name): if get_first(video_details, 'isPostLiveDvr'): self.write_debug('Video is in Post-Live Manifestless mode') - if duration or 0 > 4 * 3600: + if (duration or 0) > 4 * 3600: self.report_warning( 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. ' 'This is a known issue and patches are welcome') @@ -5003,7 +5013,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): }, { 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live', 'info_dict': { - 'id': 'GgL890LIznQ', # This will keep changing + 'id': 'Wq15eF5vCbI', # This will keep changing 'ext': 'mp4', 'title': str, 'uploader': 'Sky News', @@ -5123,7 +5133,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader': 'NoCopyrightSounds', 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!', 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', - 'title': 'NCS Releases', + 'title': 'NCS : All Releases 💿', 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds', 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds', 'modified_date': r're:\d{8}', @@ -5192,7 +5202,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'title': 'yt-dlp unlisted playlist test', 'availability': 'unlisted', 'tags': [], - 'modified_date': '20211208', + 'modified_date': '20220418', 'channel': 'colethedj', 'view_count': int, 'description': '', @@ -5280,6 +5290,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel': 'pukkandan', 'description': 'Test for collaborative playlist', 'title': 'yt-dlp test - collaborative playlist', + 'view_count': int, 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q', }, 'playlist_mincount': 2 @@ -5487,7 +5498,7 @@ class YoutubePlaylistIE(InfoExtractor): 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], }, { 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', - 'playlist_mincount': 654, + 'playlist_mincount': 455, 'info_dict': { 'title': '2018 Chinese New Singles (11/6 updated)', 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', @@ -5560,6 +5571,8 @@ class YoutubeYtBeIE(InfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw', 'availability': 'public', 'duration': 59, + 'comment_count': int, + 'channel_follower_count': int }, 'params': { 'noplaylist': True, @@ -5777,10 +5790,11 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): 'info_dict': { 'id': '#cats', 'title': '#cats', - 'entries': [{ - 'url': r're:https://(www\.)?youtube\.com/hashtag/cats', - 'title': '#cats', - }], + # The test suite does not have support for nested playlists + # 'entries': [{ + # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats', + # 'title': '#cats', + # }], }, }, { 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB', @@ -5997,6 +6011,25 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor): 'section_start': 29.0, 'section_end': 39.7, 'duration': 10.7, + 'age_limit': 0, + 'availability': 'public', + 'categories': ['Gaming'], + 'channel': 'Scott The Woz', + 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ', + 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ', + 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7', + 'like_count': int, + 'playable_in_embed': True, + 'tags': 'count:17', + 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp', + 'title': 'Mobile Games on Console - Scott The Woz', + 'upload_date': '20210920', + 'uploader': 'Scott The Woz', + 'uploader_id': 'scottthewoz', + 'uploader_url': 'http://www.youtube.com/user/scottthewoz', + 'view_count': int, + 'live_status': 'not_live', + 'channel_follower_count': int } }]