from ..utils import (
NO_DEFAULT,
ExtractorError,
+ UserNotLive,
bug_reports_message,
classproperty,
clean_html,
'age_limit': 0,
'start_time': 1,
'end_time': 9,
+ 'comment_count': int,
'channel_follower_count': int
}
},
'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
'live_status': 'not_live',
'age_limit': 0,
+ 'comment_count': int,
'channel_follower_count': int
},
'params': {
'categories': ['Entertainment'],
'duration': 106,
'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
+ 'comment_count': int,
'channel_follower_count': int
},
},
'upload_date': '20150827',
'uploader_id': 'olympic',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
- 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
+ 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
'uploader': 'Olympics',
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
'like_count': int,
'like_count': int,
'live_status': 'not_live',
'availability': 'unlisted',
+ 'comment_count': int,
'channel_follower_count': int
},
},
'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
'live_status': 'not_live',
'playable_in_embed': True,
+ 'comment_count': int,
'channel_follower_count': int
},
'params': {
'view_count': int,
'live_status': 'not_live',
'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
+ 'comment_count': int,
'channel_follower_count': int
},
'params': {
'view_count': int,
'duration': 522,
'channel': 'kudvenkat',
+ 'comment_count': int,
'channel_follower_count': int
},
'params': {
'availability': 'public',
'channel': 'Leon Nguyen',
'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
+ 'comment_count': int,
'channel_follower_count': int
}
}, {
'params': {'skip_download': True}
}, {
# Story. Requires specific player params to work.
- # Note: stories get removed after some period of time
'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
'info_dict': {
'id': 'vv8qTUWmulI',
'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
- }
+ },
+ 'skip': 'stories get removed after some period of time',
}, {
'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
'info_dict': {
}
]
+ _WEBPAGE_TESTS = [
+ # YouTube <object> embed
+ {
+ 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
+ 'md5': '873c81d308b979f0e23ee7e620b312a3',
+ 'info_dict': {
+ 'id': 'msN87y-iEx0',
+ 'ext': 'mp4',
+ 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
+ 'upload_date': '20080526',
+ 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
+ 'uploader': 'Christopher Sykes',
+ 'uploader_id': 'ChristopherJSykes',
+ 'age_limit': 0,
+ 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
+ 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
+ 'playable_in_embed': True,
+ 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
+ 'like_count': int,
+ 'comment_count': int,
+ 'channel': 'Christopher Sykes',
+ 'live_status': 'not_live',
+ 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
+ 'availability': 'public',
+ 'duration': 195,
+ 'view_count': int,
+ 'categories': ['Science & Technology'],
+ 'channel_follower_count': int,
+ 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ ]
+
@classmethod
def suitable(cls, url):
from ..utils import parse_qs
microformats = traverse_obj(
prs, (..., 'microformat', 'playerMicroformatRenderer'),
expected_type=dict, default=[])
- _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
+ _, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
start_time = time.time()
def mpd_feed(format_id, delay):
func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
assert os.path.basename(func_id) == func_id
+ self.write_debug(f'Extracting signature function {func_id}')
cache_spec = self.cache.load('youtube-sigfuncs', func_id)
if cache_spec is not None:
return lambda s: ''.join(s[i] for i in cache_spec)
@classmethod
def extract_id(cls, url):
- mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
- return mobj.group('id')
+ video_id = cls.get_temp_id(url)
+ if not video_id:
+ raise ExtractorError(f'Invalid URL: {url}')
+ return video_id
def _extract_chapters_from_json(self, data, duration):
chapter_list = traverse_obj(
if not strict:
chapter_list.sort(key=lambda c: c['start_time'] or 0)
- chapters = [{'start_time': 0, 'title': '<Untitled>'}]
+ chapters = [{'start_time': 0}]
for idx, chapter in enumerate(chapter_list):
- if chapter['start_time'] is None or not chapter['title']:
+ if chapter['start_time'] is None:
self.report_warning(f'Incomplete chapter {idx}')
elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
- chapters[-1]['end_time'] = chapter['start_time']
chapters.append(chapter)
else:
self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
- chapters[-1]['end_time'] = duration
- return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:]
+ return chapters[1:]
def _extract_comment(self, comment_renderer, parent=None):
comment_id = comment_renderer.get('commentId')
self.report_warning(last_error)
return prs, player_url
- def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):
+ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
itags, stream_ids = {}, []
itag_qualities, res_qualities = {}, {}
q = qualities([
if val in qdict), -1)
return True
+ subtitles = {}
for sd in streaming_data:
hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
if hls_manifest_url:
- for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)
+ subtitles = self._merge_subtitles(subs, subtitles)
+ for f in fmts:
if process_manifest_format(f, 'hls', self._search_regex(
r'/itag/(\d+)', f['url'], 'itag', default=None)):
yield f
dash_manifest_url = get_dash and sd.get('dashManifestUrl')
if dash_manifest_url:
- for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
+ formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
+ subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
+ for f in formats:
if process_manifest_format(f, 'dash', f['format_id']):
f['filesize'] = int_or_none(self._search_regex(
r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
f['is_from_start'] = True
yield f
+ yield subtitles
def _extract_storyboard(self, player_responses, duration):
spec = get_first(
'url': url,
'width': width,
'height': height,
+ 'fps': frame_count / duration,
+ 'rows': rows,
+ 'columns': cols,
'fragments': [{
'url': url.replace('$M', str(j)),
'duration': min(fragment_duration, duration - (j * fragment_duration)),
is_live = get_first(live_broadcast_details, 'isLiveNow')
streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
- formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))
+ *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)
- return live_broadcast_details, is_live, streaming_data, formats
+ return live_broadcast_details, is_live, streaming_data, formats, subtitles
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
or get_first(microformats, 'lengthSeconds')
or parse_duration(search_meta('duration'))) or None
- if get_first(video_details, 'isPostLiveDvr'):
- self.write_debug('Video is in Post-Live Manifestless mode')
- if duration or 0 > 4 * 3600:
- self.report_warning(
- 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
- 'This is a known issue and patches are welcome')
-
- live_broadcast_details, is_live, streaming_data, formats = self._list_formats(
- video_id, microformats, video_details, player_responses, player_url, duration)
+ live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \
+ self._list_formats(video_id, microformats, video_details, player_responses, player_url)
if not formats:
if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
formats.extend(self._extract_storyboard(player_responses, duration))
- # Source is given priority since formats that throttle are given lower source_preference
- # When throttling issue is fully fixed, remove this
+ # source_preference is lower for throttled/potentially damaged formats
self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
info = {
'release_timestamp': live_start_time,
}
+ if get_first(video_details, 'isPostLiveDvr'):
+ self.write_debug('Video is in Post-Live Manifestless mode')
+ info['live_status'] = 'post_live'
+ if (duration or 0) > 4 * 3600:
+ self.report_warning(
+ 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
+ 'This is a known issue and patches are welcome')
+
+ subtitles = {}
pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
if pctr:
def get_lang_code(track):
'name': sub_name,
})
- subtitles, automatic_captions = {}, {}
+ # NB: Constructing the full subtitle dictionary is slow
+ get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
+ self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
for lang_code, caption_track in captions.items():
base_url = caption_track.get('baseUrl')
orig_lang = parse_qs(base_url).get('lang', [None])[-1]
continue
orig_trans_code = trans_code
if caption_track.get('kind') != 'asr':
- if 'translated_subs' in self._configuration_arg('skip'):
+ if not get_translated_subs:
continue
trans_code += f'-{lang_code}'
trans_name += format_field(lang_name, None, ' from %s')
# Setting tlang=lang returns damaged subtitles.
process_language(automatic_captions, base_url, trans_code, trans_name,
{} if orig_lang == orig_trans_code else {'tlang': trans_code})
- info['automatic_captions'] = automatic_captions
- info['subtitles'] = subtitles
+
+ info['automatic_captions'] = automatic_captions
+ info['subtitles'] = subtitles
parsed_url = urllib.parse.urlparse(url)
for component in [parsed_url.fragment, parsed_url.query]:
}, {
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
'info_dict': {
- 'id': 'GgL890LIznQ', # This will keep changing
+ 'id': 'Wq15eF5vCbI', # This will keep changing
'ext': 'mp4',
'title': str,
'uploader': 'Sky News',
'uploader': 'NoCopyrightSounds',
'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
- 'title': 'NCS Releases',
+ 'title': 'NCS : All Releases 💿',
'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
'modified_date': r're:\d{8}',
'title': 'yt-dlp unlisted playlist test',
'availability': 'unlisted',
'tags': [],
- 'modified_date': '20211208',
+ 'modified_date': '20220418',
'channel': 'colethedj',
'view_count': int,
'description': '',
'channel': 'pukkandan',
'description': 'Test for collaborative playlist',
'title': 'yt-dlp test - collaborative playlist',
+ 'view_count': int,
'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
},
'playlist_mincount': 2
selected_tab_name = 'featured'
requested_tab_name = mobj['tab'][1:]
if 'no-youtube-channel-redirect' not in compat_opts:
- if requested_tab_name == 'live':
- # Live tab should have redirected to the video
- raise ExtractorError('The channel is not currently live', expected=True)
+ if requested_tab_name == 'live': # Live tab should have redirected to the video
+ raise UserNotLive(video_id=mobj['id'])
if requested_tab_name not in ('', selected_tab_name):
redirect_warning = f'The channel does not have a {requested_tab_name} tab'
if not original_tab_name:
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, {
'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
- 'playlist_mincount': 654,
+ 'playlist_mincount': 455,
'info_dict': {
'title': '2018 Chinese New Singles (11/6 updated)',
'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
'availability': 'public',
'duration': 59,
+ 'comment_count': int,
+ 'channel_follower_count': int
},
'params': {
'noplaylist': True,
'info_dict': {
'id': '#cats',
'title': '#cats',
- 'entries': [{
- 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
- 'title': '#cats',
- }],
+ # The test suite does not have support for nested playlists
+ # 'entries': [{
+ # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
+ # 'title': '#cats',
+ # }],
},
}, {
'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
'section_start': 29.0,
'section_end': 39.7,
'duration': 10.7,
+ 'age_limit': 0,
+ 'availability': 'public',
+ 'categories': ['Gaming'],
+ 'channel': 'Scott The Woz',
+ 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
+ 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
+ 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
+ 'like_count': int,
+ 'playable_in_embed': True,
+ 'tags': 'count:17',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
+ 'title': 'Mobile Games on Console - Scott The Woz',
+ 'upload_date': '20210920',
+ 'uploader': 'Scott The Woz',
+ 'uploader_id': 'scottthewoz',
+ 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
+ 'view_count': int,
+ 'live_status': 'not_live',
+ 'channel_follower_count': int
}
}]