from test.helper import FakeYDL, md5
-from youtube_dl.extractor import (
+from youtube_dlc.extractor import (
YoutubeIE,
DailymotionIE,
TEDIE,
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
- self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
- self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5')
+ self.assertEqual(md5(subtitles['en']), '688dd1ce0981683867e7fe6fde2a224b')
+ self.assertEqual(md5(subtitles['it']), '31324d30b8430b309f7f5979a504a769')
for lang in ['fr', 'de']:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'ttml'
subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54')
+ self.assertEqual(md5(subtitles['en']), 'c97ddf1217390906fa9fbd34901f3da2')
def test_youtube_subtitles_vtt_format(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'vtt'
subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
+ self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
def test_youtube_automatic_captions(self):
self.url = '8YoUxe5ncPo'
subtitles = self.getSubtitles()
self.assertTrue(subtitles['it'] is not None)
+ def test_youtube_no_automatic_captions(self):
+ self.url = 'QRS8MkLhQmM'
+ self.DL.params['writeautomaticsub'] = True
+ subtitles = self.getSubtitles()
+ self.assertTrue(not subtitles)
+
def test_youtube_translated_subtitles(self):
# This video has a subtitles track, which can be translated
- self.url = 'Ky9eprVWzlI'
+ self.url = 'i0ZabxXmH4Y'
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
'396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
}
- _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
+ _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt', 'json3')
_GEO_BYPASS = False
raise ExtractorError(
'Signature extraction failed: ' + tb, cause=e)
- def _get_subtitles(self, video_id, webpage):
+ def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
try:
subs_doc = self._download_xml(
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
'ext': ext,
})
sub_lang_list[lang] = sub_formats
+ if has_live_chat_replay:
+ sub_lang_list['live_chat'] = [
+ {
+ 'video_id': video_id,
+ 'ext': 'json',
+ 'protocol': 'youtube_live_chat_replay',
+ },
+ ]
if not sub_lang_list:
self._downloader.report_warning('video doesn\'t have subtitles')
return {}
return self._parse_json(
uppercase_escape(config), video_id, fatal=False)
+ def _get_yt_initial_data(self, video_id, webpage):
+ config = self._search_regex(
+ (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
+ r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
+ webpage, 'ytInitialData', default=None)
+ if config:
+ return self._parse_json(
+ uppercase_escape(config), video_id, fatal=False)
+
def _get_automatic_captions(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an
argument to speed up the process."""
player_response, video_id, fatal=False)
if player_response:
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
- base_url = renderer['captionTracks'][0]['baseUrl']
- sub_lang_list = []
- for lang in renderer['translationLanguages']:
- lang_code = lang.get('languageCode')
- if lang_code:
- sub_lang_list.append(lang_code)
- return make_captions(base_url, sub_lang_list)
-
+ caption_tracks = renderer['captionTracks']
+ for caption_track in caption_tracks:
+ if 'kind' not in caption_track:
+ # not an automatic transcription
+ continue
+ base_url = caption_track['baseUrl']
+ sub_lang_list = []
+ for lang in renderer['translationLanguages']:
+ lang_code = lang.get('languageCode')
+ if lang_code:
+ sub_lang_list.append(lang_code)
+ return make_captions(base_url, sub_lang_list)
+
+ self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
+ return {}
# Some videos don't provide ttsurl but rather caption_tracks and
# caption_translation_languages (e.g. 20LmZk1hakA)
# Does not used anymore as of 22.06.2017
def _extract_chapters_from_json(self, webpage, video_id, duration):
if not webpage:
return
- player = self._parse_json(
+ initial_data = self._parse_json(
self._search_regex(
- r'RELATED_PLAYER_ARGS["\']\s*:\s*({.+})\s*,?\s*\n', webpage,
+ r'window\["ytInitialData"\] = (.+);\n', webpage,
'player args', default='{}'),
video_id, fatal=False)
- if not player or not isinstance(player, dict):
- return
- watch_next_response = player.get('watch_next_response')
- if not isinstance(watch_next_response, compat_str):
- return
- response = self._parse_json(watch_next_response, video_id, fatal=False)
- if not response or not isinstance(response, dict):
+ if not initial_data or not isinstance(initial_data, dict):
return
chapters_list = try_get(
- response,
+ initial_data,
lambda x: x['playerOverlays']
['playerOverlayRenderer']
['decoratedPlayerBarRenderer']
# Get video info
video_info = {}
embed_webpage = None
- if re.search(r'player-age-gate-content">', video_webpage) is not None:
+ if self._html_search_meta('og:restrictions:age', video_webpage, default=None) == "18+":
age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id}
# this can be viewed without login into Youtube
if is_live is None:
is_live = bool_or_none(video_details.get('isLive'))
+ has_live_chat_replay = False
+ if not is_live:
+ yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
+ try:
+ yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
+ has_live_chat_replay = True
+ except (KeyError, IndexError, TypeError):
+ pass
+
# Check for "rental" videos
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
# subtitles
- video_subtitles = self.extract_subtitles(video_id, video_webpage)
+ video_subtitles = self.extract_subtitles(
+ video_id, video_webpage, has_live_chat_replay)
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
video_duration = try_get(