]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
[zattoo] Add support for more zattoo platform sites
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
8d81f3e3 19 compat_kwargs,
c5e8d7af 20 compat_parse_qs,
7fd002c0
S
21 compat_urllib_parse_unquote,
22 compat_urllib_parse_unquote_plus,
15707c7e 23 compat_urllib_parse_urlencode,
7c80519c 24 compat_urllib_parse_urlparse,
7c61bd36 25 compat_urlparse,
c5e8d7af 26 compat_str,
4bb4a188
PH
27)
28from ..utils import (
c5e8d7af 29 clean_html,
9b9c5355 30 error_to_compat_str,
c5e8d7af 31 ExtractorError,
2d30521a 32 float_or_none,
4bb4a188
PH
33 get_element_by_attribute,
34 get_element_by_id,
dd27fd17 35 int_or_none,
94278f72 36 mimetype2ext,
4bb4a188 37 orderedSet,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
54fc90aa 40 qualities,
0cb58b02 41 remove_quotes,
3995d37d 42 remove_start,
cf7e015f 43 smuggle_url,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
81c2f20b 49 uppercase_escape,
6e6bc8da 50 urlencode_postdata,
c5e8d7af
PH
51)
52
5f6a1245 53
de7f3446 54class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
55 """Provide base functions for Youtube extractors"""
56 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 57 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
58
59 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
60 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
61 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 62
b2e8bc1b
JMF
63 _NETRC_MACHINE = 'youtube'
64 # If True it will raise an error if no login info is provided
65 _LOGIN_REQUIRED = False
66
409b9324 67 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
d0ba5587 68
b2e8bc1b 69 def _set_language(self):
810fb84d
PH
70 self._set_cookie(
71 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 72 # YouTube sets the expire time to about two months
810fb84d 73 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 74
25f14e9f
S
75 def _ids_to_results(self, ids):
76 return [
77 self.url_result(vid_id, 'Youtube', video_id=vid_id)
78 for vid_id in ids]
79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
68217024 88 username, password = self._get_login_info()
b2e8bc1b
JMF
89 # No authentication to be performed
90 if username is None:
70d35d16 91 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 92 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 93 return True
b2e8bc1b 94
7cc3570e
PH
95 login_page = self._download_webpage(
96 self._LOGIN_URL, None,
69ea8ca4
PH
97 note='Downloading login page',
98 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
99 if login_page is False:
100 return
b2e8bc1b 101
1212e997 102 login_form = self._hidden_inputs(login_page)
c5e8d7af 103
e00eb564
S
104 def req(url, f_req, note, errnote):
105 data = login_form.copy()
106 data.update({
107 'pstMsg': 1,
108 'checkConnection': 'youtube',
109 'checkedDomains': 'youtube',
110 'hl': 'en',
111 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 112 'f.req': json.dumps(f_req),
e00eb564
S
113 'flowName': 'GlifWebSignIn',
114 'flowEntry': 'ServiceLogin',
041bc3ad 115 })
e00eb564
S
116 return self._download_json(
117 url, None, note=note, errnote=errnote,
118 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
119 fatal=False,
120 data=urlencode_postdata(data), headers={
121 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
122 'Google-Accounts-XSRF': 1,
123 })
124
3995d37d
S
125 def warn(message):
126 self._downloader.report_warning(message)
127
128 lookup_req = [
129 username,
130 None, [], None, 'US', None, None, 2, False, True,
131 [
132 None, None,
133 [2, 1, None, 1,
134 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
135 None, [], 4],
136 1, [None, None, []], None, None, None, True
137 ],
138 username,
139 ]
140
e00eb564 141 lookup_results = req(
3995d37d 142 self._LOOKUP_URL, lookup_req,
e00eb564
S
143 'Looking up account info', 'Unable to look up account info')
144
145 if lookup_results is False:
146 return False
041bc3ad 147
3995d37d
S
148 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
149 if not user_hash:
150 warn('Unable to extract user hash')
151 return False
152
153 challenge_req = [
154 user_hash,
155 None, 1, None, [1, None, None, None, [password, None, True]],
156 [
157 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
158 1, [None, None, []], None, None, None, True
159 ]]
83317f69 160
3995d37d
S
161 challenge_results = req(
162 self._CHALLENGE_URL, challenge_req,
163 'Logging in', 'Unable to log in')
83317f69 164
3995d37d 165 if challenge_results is False:
e00eb564 166 return
83317f69 167
3995d37d
S
168 login_res = try_get(challenge_results, lambda x: x[0][5], list)
169 if login_res:
170 login_msg = try_get(login_res, lambda x: x[5], compat_str)
171 warn(
172 'Unable to login: %s' % 'Invalid password'
173 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
174 return False
175
176 res = try_get(challenge_results, lambda x: x[0][-1], list)
177 if not res:
178 warn('Unable to extract result entry')
179 return False
180
9a6628aa
S
181 login_challenge = try_get(res, lambda x: x[0][0], list)
182 if login_challenge:
183 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
184 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
185 # SEND_SUCCESS - TFA code has been successfully sent to phone
186 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 187 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
188 if status == 'QUOTA_EXCEEDED':
189 warn('Exceeded the limit of TFA codes, try later')
190 return False
191
192 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
193 if not tl:
194 warn('Unable to extract TL')
195 return False
196
197 tfa_code = self._get_tfa_info('2-step verification code')
198
199 if not tfa_code:
200 warn(
201 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
202 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
203 return False
204
205 tfa_code = remove_start(tfa_code, 'G-')
206
207 tfa_req = [
208 user_hash, None, 2, None,
209 [
210 9, None, None, None, None, None, None, None,
211 [None, tfa_code, True, 2]
212 ]]
213
214 tfa_results = req(
215 self._TFA_URL.format(tl), tfa_req,
216 'Submitting TFA code', 'Unable to submit TFA code')
217
218 if tfa_results is False:
219 return False
220
221 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
222 if tfa_res:
223 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
224 warn(
225 'Unable to finish TFA: %s' % 'Invalid TFA code'
226 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
227 return False
228
229 check_cookie_url = try_get(
230 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
231 else:
232 CHALLENGES = {
233 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
234 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
235 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
236 }
237 challenge = CHALLENGES.get(
238 challenge_str,
239 '%s returned error %s.' % (self.IE_NAME, challenge_str))
240 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
241 return False
3995d37d
S
242 else:
243 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
244
245 if not check_cookie_url:
246 warn('Unable to extract CheckCookie URL')
247 return False
e00eb564
S
248
249 check_cookie_results = self._download_webpage(
3995d37d
S
250 check_cookie_url, None, 'Checking cookie', fatal=False)
251
252 if check_cookie_results is False:
253 return False
e00eb564 254
3995d37d
S
255 if 'https://myaccount.google.com/' not in check_cookie_results:
256 warn('Unable to log in')
b2e8bc1b 257 return False
e00eb564 258
b2e8bc1b
JMF
259 return True
260
30226342 261 def _download_webpage_handle(self, *args, **kwargs):
c1148516
S
262 query = kwargs.get('query', {}).copy()
263 query['disable_polymer'] = 'true'
264 kwargs['query'] = query
30226342 265 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
266 *args, **compat_kwargs(kwargs))
267
b2e8bc1b
JMF
268 def _real_initialize(self):
269 if self._downloader is None:
270 return
42939b61 271 self._set_language()
b2e8bc1b
JMF
272 if not self._login():
273 return
c5e8d7af 274
8377574c 275
8e7aad20 276class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 277 # Extract entries from page with "Load more" button
648e6a1f
S
278 def _entries(self, page, playlist_id):
279 more_widget_html = content_html = page
280 for page_num in itertools.count(1):
061a75ed
S
281 for entry in self._process_page(content_html):
282 yield entry
648e6a1f
S
283
284 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
285 if not mobj:
286 break
287
288 more = self._download_json(
289 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
290 'Downloading page #%s' % page_num,
291 transform_source=uppercase_escape)
292 content_html = more['content_html']
293 if not content_html.strip():
294 # Some webpages show a "Load more" button but they don't
295 # have more videos
296 break
297 more_widget_html = more['load_more_widget_html']
298
061a75ed
S
299
300class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
301 def _process_page(self, content):
302 for video_id, video_title in self.extract_videos_from_page(content):
303 yield self.url_result(video_id, 'Youtube', video_id, video_title)
304
648e6a1f
S
305 def extract_videos_from_page(self, page):
306 ids_in_page = []
307 titles_in_page = []
308 for mobj in re.finditer(self._VIDEO_RE, page):
309 # The link with index 0 is not the first video of the playlist (not sure if still actual)
310 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
311 continue
312 video_id = mobj.group('id')
313 video_title = unescapeHTML(mobj.group('title'))
314 if video_title:
315 video_title = video_title.strip()
316 try:
317 idx = ids_in_page.index(video_id)
318 if video_title and not titles_in_page[idx]:
319 titles_in_page[idx] = video_title
320 except ValueError:
321 ids_in_page.append(video_id)
322 titles_in_page.append(video_title)
323 return zip(ids_in_page, titles_in_page)
324
325
061a75ed
S
326class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
327 def _process_page(self, content):
6dee688e
S
328 for playlist_id in orderedSet(re.findall(
329 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
330 content)):
061a75ed
S
331 yield self.url_result(
332 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
333
0c148415
S
334 def _real_extract(self, url):
335 playlist_id = self._match_id(url)
336 webpage = self._download_webpage(url, playlist_id)
0c148415 337 title = self._og_search_title(webpage, fatal=False)
061a75ed 338 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
339
340
360e1ca5 341class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 342 IE_DESC = 'YouTube.com'
cb7dfeea 343 _VALID_URL = r"""(?x)^
c5e8d7af 344 (
edb53e2d 345 (?:https?://|//) # http(s):// or protocol-independent URL
cb7dfeea 346 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
484aaeb2 347 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 348 (?:www\.)?pwnyoutube\.com/|
8b561bfc 349 (?:www\.)?hooktube\.com/|
f7000f3a 350 (?:www\.)?yourepeat\.com/|
e69ae5b9
JMF
351 tube\.majestyc\.net/|
352 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
353 (?:.*?\#/)? # handle anchor (#/) redirect urls
354 (?: # the various things that can precede the ID:
ac7553d0 355 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 356 |(?: # or the v= param in all its forms
f7000f3a 357 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 358 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 359 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
360 v=
361 )
f4b05232 362 ))
cbaed4bb
S
363 |(?:
364 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
365 vid\.plus| # or vid.plus/xxxx
366 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 367 )/
edb53e2d 368 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 369 )
c5e8d7af 370 )? # all until now is optional -> you can pass the naked ID
8963d9c2 371 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
372 (?!.*?\blist=
373 (?:
374 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
375 WL # WL are handled by the watch later IE
376 )
377 )
c5e8d7af 378 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 379 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 380 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26 381 _formats = {
c2d3cb4c 382 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
383 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
384 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
385 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
386 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
387 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
388 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
389 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 390 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 391 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
392 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
393 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
394 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
395 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
396 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 397 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 398 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
399 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 400
401
402 # 3D videos
c2d3cb4c 403 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
404 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
405 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
406 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 407 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
408 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
409 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 410
96fb5605 411 # Apple HTTP Live Streaming
11f12195 412 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 413 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
414 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
415 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
416 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
417 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 418 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
419 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
420
421 # DASH mp4 video
d23028a8
S
422 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
423 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
424 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
425 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
426 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
427 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
428 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
429 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
430 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
431 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
432 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
433 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 434
f6f1fc92 435 # Dash mp4 audio
d23028a8
S
436 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
437 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
438 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
439 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
440 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
441 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
442 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
443
444 # Dash webm
d23028a8
S
445 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
446 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
447 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
448 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
449 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
450 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
451 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
452 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
453 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
454 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
455 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
456 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
457 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
458 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
459 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 460 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
461 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
462 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
463 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
464 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
465 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
466 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
467
468 # Dash webm audio
d23028a8
S
469 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
470 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 471
0857baad 472 # Dash webm audio with opus inside
d23028a8
S
473 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
474 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
475 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 476
ce6b9a2d
PH
477 # RTMP (unnamed)
478 '_rtmp': {'protocol': 'rtmp'},
c5e8d7af 479 }
23d17e4b 480 _SUBTITLE_FORMATS = ('ttml', 'vtt')
836a086c 481
fd5c4aab
S
482 _GEO_BYPASS = False
483
78caa52a 484 IE_NAME = 'youtube'
2eb88d95
PH
485 _TESTS = [
486 {
2d3d2997 487 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
488 'info_dict': {
489 'id': 'BaW_jenozKc',
490 'ext': 'mp4',
491 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
492 'uploader': 'Philipp Hagemeister',
493 'uploader_id': 'phihag',
ec85ded8 494 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
495 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
496 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 497 'upload_date': '20121002',
7caf9830 498 'license': 'Standard YouTube License',
4bc3a23e
PH
499 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
500 'categories': ['Science & Technology'],
000b6b5a 501 'tags': ['youtube-dl'],
556dbe7f 502 'duration': 10,
3e7c1224
PH
503 'like_count': int,
504 'dislike_count': int,
7c80519c 505 'start_time': 1,
297a564b 506 'end_time': 9,
2eb88d95 507 }
0e853ca4 508 },
0e853ca4 509 {
2d3d2997 510 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
4bc3a23e
PH
511 'note': 'Test generic use_cipher_signature video (#897)',
512 'info_dict': {
513 'id': 'UxxajLWwzqY',
514 'ext': 'mp4',
515 'upload_date': '20120506',
516 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 517 'alt_title': 'I Love It (feat. Charli XCX)',
7caf9830 518 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
000b6b5a
S
519 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
520 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
521 'iconic ep', 'iconic', 'love', 'it'],
556dbe7f 522 'duration': 180,
4bc3a23e
PH
523 'uploader': 'Icona Pop',
524 'uploader_id': 'IconaPop',
ec85ded8 525 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
7caf9830 526 'license': 'Standard YouTube License',
0cb58b02 527 'creator': 'Icona Pop',
936784b2
S
528 'track': 'I Love It (feat. Charli XCX)',
529 'artist': 'Icona Pop',
2eb88d95 530 }
c108eb73
JMF
531 },
532 {
4bc3a23e
PH
533 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
534 'note': 'Test VEVO video with age protection (#956)',
535 'info_dict': {
536 'id': '07FYdnEawAQ',
537 'ext': 'mp4',
538 'upload_date': '20130703',
539 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
0cb58b02 540 'alt_title': 'Tunnel Vision',
4bc3a23e 541 'description': 'md5:64249768eec3bc4276236606ea996373',
556dbe7f 542 'duration': 419,
4bc3a23e
PH
543 'uploader': 'justintimberlakeVEVO',
544 'uploader_id': 'justintimberlakeVEVO',
ec85ded8 545 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
7caf9830 546 'license': 'Standard YouTube License',
0cb58b02 547 'creator': 'Justin Timberlake',
7e72694b 548 'track': 'Tunnel Vision',
936784b2 549 'artist': 'Justin Timberlake',
34952f09 550 'age_limit': 18,
c108eb73
JMF
551 }
552 },
fccd3771 553 {
4bc3a23e
PH
554 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
555 'note': 'Embed-only video (#1746)',
556 'info_dict': {
557 'id': 'yZIXLfi8CZQ',
558 'ext': 'mp4',
559 'upload_date': '20120608',
560 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
561 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
562 'uploader': 'SET India',
94bfcd23 563 'uploader_id': 'setindia',
ec85ded8 564 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
7caf9830 565 'license': 'Standard YouTube License',
94bfcd23 566 'age_limit': 18,
fccd3771
PH
567 }
568 },
11b56058 569 {
2d3d2997 570 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
571 'note': 'Use the first video ID in the URL',
572 'info_dict': {
573 'id': 'BaW_jenozKc',
574 'ext': 'mp4',
575 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
576 'uploader': 'Philipp Hagemeister',
577 'uploader_id': 'phihag',
ec85ded8 578 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 579 'upload_date': '20121002',
7caf9830 580 'license': 'Standard YouTube License',
11b56058
PM
581 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
582 'categories': ['Science & Technology'],
583 'tags': ['youtube-dl'],
556dbe7f 584 'duration': 10,
11b56058
PM
585 'like_count': int,
586 'dislike_count': int,
34a7de29
S
587 },
588 'params': {
589 'skip_download': True,
590 },
11b56058 591 },
dd27fd17 592 {
2d3d2997 593 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
594 'note': '256k DASH audio (format 141) via DASH manifest',
595 'info_dict': {
596 'id': 'a9LDPn-MO4I',
597 'ext': 'm4a',
598 'upload_date': '20121002',
599 'uploader_id': '8KVIDEO',
ec85ded8 600 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
601 'description': '',
602 'uploader': '8KVIDEO',
7caf9830 603 'license': 'Standard YouTube License',
4bc3a23e 604 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 605 },
4bc3a23e
PH
606 'params': {
607 'youtube_include_dash_manifest': True,
608 'format': '141',
4919603f 609 },
de3c7fe0 610 'skip': 'format 141 not served anymore',
dd27fd17 611 },
3489b7d2
JMF
612 # DASH manifest with encrypted signature
613 {
78caa52a
PH
614 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
615 'info_dict': {
616 'id': 'IB3lcPjvWLA',
617 'ext': 'm4a',
b766eb27 618 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
eb6793ba 619 'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
556dbe7f 620 'duration': 244,
78caa52a
PH
621 'uploader': 'AfrojackVEVO',
622 'uploader_id': 'AfrojackVEVO',
623 'upload_date': '20131011',
7caf9830 624 'license': 'Standard YouTube License',
3489b7d2 625 },
4bc3a23e 626 'params': {
78caa52a 627 'youtube_include_dash_manifest': True,
de3c7fe0 628 'format': '141/bestaudio[ext=m4a]',
3489b7d2
JMF
629 },
630 },
aaeb86f6
S
631 # JS player signature function name containing $
632 {
633 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
634 'info_dict': {
635 'id': 'nfWlot6h_JM',
636 'ext': 'm4a',
637 'title': 'Taylor Swift - Shake It Off',
0cb58b02 638 'alt_title': 'Shake It Off',
f57b7835 639 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
556dbe7f 640 'duration': 242,
aaeb86f6
S
641 'uploader': 'TaylorSwiftVEVO',
642 'uploader_id': 'TaylorSwiftVEVO',
643 'upload_date': '20140818',
7caf9830 644 'license': 'Standard YouTube License',
0cb58b02 645 'creator': 'Taylor Swift',
aaeb86f6
S
646 },
647 'params': {
648 'youtube_include_dash_manifest': True,
de3c7fe0 649 'format': '141/bestaudio[ext=m4a]',
aaeb86f6
S
650 },
651 },
aa79ac0c
PH
652 # Controversy video
653 {
654 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
655 'info_dict': {
656 'id': 'T4XJQO3qol8',
657 'ext': 'mp4',
556dbe7f 658 'duration': 219,
aa79ac0c 659 'upload_date': '20100909',
eb6793ba 660 'uploader': 'TJ Kirk',
aa79ac0c 661 'uploader_id': 'TheAmazingAtheist',
ec85ded8 662 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
7caf9830 663 'license': 'Standard YouTube License',
aa79ac0c
PH
664 'title': 'Burning Everyone\'s Koran',
665 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
666 }
c522adb1
JMF
667 },
668 # Normal age-gate video (No vevo, embed allowed)
669 {
2d3d2997 670 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
671 'info_dict': {
672 'id': 'HtVdAasjOgU',
673 'ext': 'mp4',
674 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 675 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 676 'duration': 142,
c522adb1
JMF
677 'uploader': 'The Witcher',
678 'uploader_id': 'WitcherGame',
ec85ded8 679 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 680 'upload_date': '20140605',
7caf9830 681 'license': 'Standard YouTube License',
34952f09 682 'age_limit': 18,
c522adb1
JMF
683 },
684 },
fccae2b9
S
685 # Age-gate video with encrypted signature
686 {
2d3d2997 687 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
fccae2b9
S
688 'info_dict': {
689 'id': '6kLq3WMV1nU',
eb6793ba 690 'ext': 'webm',
fccae2b9
S
691 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
692 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
eb6793ba 693 'duration': 246,
fccae2b9
S
694 'uploader': 'LloydVEVO',
695 'uploader_id': 'LloydVEVO',
ec85ded8 696 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 697 'upload_date': '20110629',
7caf9830 698 'license': 'Standard YouTube License',
34952f09 699 'age_limit': 18,
fccae2b9
S
700 },
701 },
774e208f 702 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
7d02dcfa 703 # YouTube Red ad is not captured for creator
774e208f
PH
704 {
705 'url': '__2ABJjxzNo',
706 'info_dict': {
707 'id': '__2ABJjxzNo',
708 'ext': 'mp4',
556dbe7f 709 'duration': 266,
774e208f
PH
710 'upload_date': '20100430',
711 'uploader_id': 'deadmau5',
ec85ded8 712 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
0cb58b02 713 'creator': 'deadmau5',
774e208f
PH
714 'description': 'md5:12c56784b8032162bb936a5f76d55360',
715 'uploader': 'deadmau5',
7caf9830 716 'license': 'Standard YouTube License',
774e208f 717 'title': 'Deadmau5 - Some Chords (HD)',
0cb58b02 718 'alt_title': 'Some Chords',
774e208f
PH
719 },
720 'expected_warnings': [
721 'DASH manifest missing',
722 ]
e52a40ab
PH
723 },
724 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
725 {
726 'url': 'lqQg6PlCWgI',
727 'info_dict': {
728 'id': 'lqQg6PlCWgI',
729 'ext': 'mp4',
556dbe7f 730 'duration': 6085,
90227264 731 'upload_date': '20150827',
cbe2bd91 732 'uploader_id': 'olympic',
ec85ded8 733 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
7caf9830 734 'license': 'Standard YouTube License',
cbe2bd91 735 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 736 'uploader': 'Olympic',
cbe2bd91
PH
737 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
738 },
739 'params': {
740 'skip_download': 'requires avconv',
e52a40ab 741 }
cbe2bd91 742 },
6271f1ca
PH
743 # Non-square pixels
744 {
745 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
746 'info_dict': {
747 'id': '_b-2C3KPAM0',
748 'ext': 'mp4',
749 'stretched_ratio': 16 / 9.,
556dbe7f 750 'duration': 85,
6271f1ca
PH
751 'upload_date': '20110310',
752 'uploader_id': 'AllenMeow',
ec85ded8 753 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 754 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 755 'uploader': '孫ᄋᄅ',
7caf9830 756 'license': 'Standard YouTube License',
6271f1ca
PH
757 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
758 },
06b491eb
S
759 },
760 # url_encoded_fmt_stream_map is empty string
761 {
762 'url': 'qEJwOuvDf7I',
763 'info_dict': {
764 'id': 'qEJwOuvDf7I',
f57b7835 765 'ext': 'webm',
06b491eb
S
766 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
767 'description': '',
768 'upload_date': '20150404',
769 'uploader_id': 'spbelect',
770 'uploader': 'Наблюдатели Петербурга',
771 },
772 'params': {
773 'skip_download': 'requires avconv',
e323cf3f
S
774 },
775 'skip': 'This live event has ended.',
06b491eb 776 },
da77d856
S
777 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
778 {
779 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
780 'info_dict': {
781 'id': 'FIl7x6_3R5Y',
eb6793ba 782 'ext': 'webm',
da77d856
S
783 'title': 'md5:7b81415841e02ecd4313668cde88737a',
784 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 785 'duration': 220,
da77d856
S
786 'upload_date': '20150625',
787 'uploader_id': 'dorappi2000',
ec85ded8 788 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 789 'uploader': 'dorappi2000',
7caf9830 790 'license': 'Standard YouTube License',
eb6793ba 791 'formats': 'mincount:31',
da77d856 792 },
eb6793ba 793 'skip': 'not actual anymore',
2ee8f5d8 794 },
8a1a26ce
YCH
795 # DASH manifest with segment_list
796 {
797 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
798 'md5': '8ce563a1d667b599d21064e982ab9e31',
799 'info_dict': {
800 'id': 'CsmdDsKjzN8',
801 'ext': 'mp4',
17ee98e1 802 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
803 'uploader': 'Airtek',
804 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
805 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
7caf9830 806 'license': 'Standard YouTube License',
8a1a26ce
YCH
807 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
808 },
809 'params': {
810 'youtube_include_dash_manifest': True,
811 'format': '135', # bestvideo
be49068d
S
812 },
813 'skip': 'This live event has ended.',
2ee8f5d8 814 },
cf7e015f
S
815 {
816 # Multifeed videos (multiple cameras), URL is for Main Camera
817 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
818 'info_dict': {
819 'id': 'jqWvoWXjCVs',
820 'title': 'teamPGP: Rocket League Noob Stream',
821 'description': 'md5:dc7872fb300e143831327f1bae3af010',
822 },
823 'playlist': [{
824 'info_dict': {
825 'id': 'jqWvoWXjCVs',
826 'ext': 'mp4',
827 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
828 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 829 'duration': 7335,
cf7e015f
S
830 'upload_date': '20150721',
831 'uploader': 'Beer Games Beer',
832 'uploader_id': 'beergamesbeer',
ec85ded8 833 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 834 'license': 'Standard YouTube License',
cf7e015f
S
835 },
836 }, {
837 'info_dict': {
838 'id': '6h8e8xoXJzg',
839 'ext': 'mp4',
840 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
841 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 842 'duration': 7337,
cf7e015f
S
843 'upload_date': '20150721',
844 'uploader': 'Beer Games Beer',
845 'uploader_id': 'beergamesbeer',
ec85ded8 846 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 847 'license': 'Standard YouTube License',
cf7e015f
S
848 },
849 }, {
850 'info_dict': {
851 'id': 'PUOgX5z9xZw',
852 'ext': 'mp4',
853 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
854 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 855 'duration': 7337,
cf7e015f
S
856 'upload_date': '20150721',
857 'uploader': 'Beer Games Beer',
858 'uploader_id': 'beergamesbeer',
ec85ded8 859 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 860 'license': 'Standard YouTube License',
cf7e015f
S
861 },
862 }, {
863 'info_dict': {
864 'id': 'teuwxikvS5k',
865 'ext': 'mp4',
866 'title': 'teamPGP: Rocket League Noob Stream (zim)',
867 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 868 'duration': 7334,
cf7e015f
S
869 'upload_date': '20150721',
870 'uploader': 'Beer Games Beer',
871 'uploader_id': 'beergamesbeer',
ec85ded8 872 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 873 'license': 'Standard YouTube License',
cf7e015f
S
874 },
875 }],
876 'params': {
877 'skip_download': True,
878 },
cbaed4bb 879 },
f9f49d87
S
880 {
881 # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
882 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
883 'info_dict': {
884 'id': 'gVfLd0zydlo',
885 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
886 },
887 'playlist_count': 2,
be49068d 888 'skip': 'Not multifeed anymore',
f9f49d87 889 },
cbaed4bb 890 {
2d3d2997 891 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 892 'only_matching': True,
0e49d9a6 893 },
6d4fc66b 894 {
2d3d2997 895 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
896 'only_matching': True,
897 },
0e49d9a6 898 {
61f92af1 899 # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
a8776b10
S
900 # Also tests cut-off URL expansion in video description (see
901 # https://github.com/rg3/youtube-dl/issues/1892,
902 # https://github.com/rg3/youtube-dl/issues/8164)
0e49d9a6
LL
903 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
904 'info_dict': {
905 'id': 'lsguqyKfVQg',
906 'ext': 'mp4',
907 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 908 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 909 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 910 'duration': 133,
0e49d9a6
LL
911 'upload_date': '20151119',
912 'uploader_id': 'IronSoulElf',
ec85ded8 913 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 914 'uploader': 'IronSoulElf',
7caf9830 915 'license': 'Standard YouTube License',
eb6793ba
S
916 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
917 'track': 'Dark Walk - Position Music',
918 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
0e49d9a6
LL
919 },
920 'params': {
921 'skip_download': True,
922 },
923 },
61f92af1
S
924 {
925 # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
926 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
927 'only_matching': True,
928 },
313dfc45
LL
929 {
930 # Video with yt:stretch=17:0
931 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
932 'info_dict': {
933 'id': 'Q39EVAstoRM',
934 'ext': 'mp4',
935 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
936 'description': 'md5:ee18a25c350637c8faff806845bddee9',
937 'upload_date': '20151107',
938 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
939 'uploader': 'CH GAMER DROID',
940 },
941 'params': {
942 'skip_download': True,
943 },
be49068d 944 'skip': 'This video does not exist.',
313dfc45 945 },
7caf9830
S
946 {
947 # Video licensed under Creative Commons
948 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
949 'info_dict': {
950 'id': 'M4gD1WSo5mA',
951 'ext': 'mp4',
952 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
953 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 954 'duration': 721,
7caf9830
S
955 'upload_date': '20150127',
956 'uploader_id': 'BerkmanCenter',
ec85ded8 957 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 958 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
959 'license': 'Creative Commons Attribution license (reuse allowed)',
960 },
961 'params': {
962 'skip_download': True,
963 },
964 },
fd050249
S
965 {
966 # Channel-like uploader_url
967 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
968 'info_dict': {
969 'id': 'eQcmzGIKrzg',
970 'ext': 'mp4',
971 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
972 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 973 'duration': 4060,
fd050249 974 'upload_date': '20151119',
eb6793ba 975 'uploader': 'Bernie Sanders',
fd050249 976 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 977 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
978 'license': 'Creative Commons Attribution license (reuse allowed)',
979 },
980 'params': {
981 'skip_download': True,
982 },
983 },
040ac686
S
984 {
985 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
986 'only_matching': True,
7f29cf54
S
987 },
988 {
989 # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
990 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
991 'only_matching': True,
6496ccb4
S
992 },
993 {
994 # Rental video preview
995 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
996 'info_dict': {
997 'id': 'uGpuVWrhIzE',
998 'ext': 'mp4',
999 'title': 'Piku - Trailer',
1000 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1001 'upload_date': '20150811',
1002 'uploader': 'FlixMatrix',
1003 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1004 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1005 'license': 'Standard YouTube License',
1006 },
1007 'params': {
1008 'skip_download': True,
1009 },
eb6793ba 1010 'skip': 'This video is not available.',
022a5d66 1011 },
12afdc2a
S
1012 {
1013 # YouTube Red video with episode data
1014 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1015 'info_dict': {
1016 'id': 'iqKdEhx-dD4',
1017 'ext': 'mp4',
1018 'title': 'Isolation - Mind Field (Ep 1)',
eb6793ba 1019 'description': 'md5:25b78d2f64ae81719f5c96319889b736',
556dbe7f 1020 'duration': 2085,
12afdc2a
S
1021 'upload_date': '20170118',
1022 'uploader': 'Vsauce',
1023 'uploader_id': 'Vsauce',
1024 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1025 'license': 'Standard YouTube License',
1026 'series': 'Mind Field',
1027 'season_number': 1,
1028 'episode_number': 1,
1029 },
1030 'params': {
1031 'skip_download': True,
1032 },
1033 'expected_warnings': [
1034 'Skipping DASH manifest',
1035 ],
1036 },
c7121fa7
S
1037 {
1038 # The following content has been identified by the YouTube community
1039 # as inappropriate or offensive to some audiences.
1040 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1041 'info_dict': {
1042 'id': '6SJNVb0GnPI',
1043 'ext': 'mp4',
1044 'title': 'Race Differences in Intelligence',
1045 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1046 'duration': 965,
1047 'upload_date': '20140124',
1048 'uploader': 'New Century Foundation',
1049 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1050 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1051 'license': 'Standard YouTube License',
c7121fa7
S
1052 },
1053 'params': {
1054 'skip_download': True,
1055 },
1056 },
022a5d66
S
1057 {
1058 # itag 212
1059 'url': '1t24XAntNCY',
1060 'only_matching': True,
fd5c4aab
S
1061 },
1062 {
1063 # geo restricted to JP
1064 'url': 'sJL6WA-aGkQ',
1065 'only_matching': True,
1066 },
d0ba5587
S
1067 {
1068 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1069 'only_matching': True,
1070 },
2eb88d95
PH
1071 ]
1072
e0df6211
PH
1073 def __init__(self, *args, **kwargs):
1074 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1075 self._player_cache = {}
e0df6211 1076
c5e8d7af
PH
1077 def report_video_info_webpage_download(self, video_id):
1078 """Report attempt to download video info webpage."""
69ea8ca4 1079 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1080
c5e8d7af
PH
1081 def report_information_extraction(self, video_id):
1082 """Report attempt to extract video information."""
69ea8ca4 1083 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1084
1085 def report_unavailable_format(self, video_id, format):
1086 """Report extracted video URL."""
69ea8ca4 1087 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1088
1089 def report_rtmp_download(self):
1090 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1091 self.to_screen('RTMP download detected')
c5e8d7af 1092
60064c53
PH
1093 def _signature_cache_id(self, example_sig):
1094 """ Return a string representation of a signature """
78caa52a 1095 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53
PH
1096
1097 def _extract_signature_function(self, video_id, player_url, example_sig):
cf010131 1098 id_m = re.match(
e31fed95 1099 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
cf010131 1100 player_url)
c081b35c
PH
1101 if not id_m:
1102 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
1103 player_type = id_m.group('ext')
1104 player_id = id_m.group('id')
1105
c4417ddb 1106 # Read from filesystem cache
60064c53
PH
1107 func_id = '%s_%s_%s' % (
1108 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1109 assert os.path.basename(func_id) == func_id
a0e07d31 1110
69ea8ca4 1111 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1112 if cache_spec is not None:
78caa52a 1113 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1114
6d1a55a5
PH
1115 download_note = (
1116 'Downloading player %s' % player_url
1117 if self._downloader.params.get('verbose') else
1118 'Downloading %s player %s' % (player_type, player_id)
1119 )
e0df6211
PH
1120 if player_type == 'js':
1121 code = self._download_webpage(
1122 player_url, video_id,
6d1a55a5 1123 note=download_note,
69ea8ca4 1124 errnote='Download of %s failed' % player_url)
83799698 1125 res = self._parse_sig_js(code)
c4417ddb 1126 elif player_type == 'swf':
e0df6211
PH
1127 urlh = self._request_webpage(
1128 player_url, video_id,
6d1a55a5 1129 note=download_note,
69ea8ca4 1130 errnote='Download of %s failed' % player_url)
e0df6211 1131 code = urlh.read()
83799698 1132 res = self._parse_sig_swf(code)
e0df6211
PH
1133 else:
1134 assert False, 'Invalid player type %r' % player_type
1135
785521bf
PH
1136 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1137 cache_res = res(test_string)
1138 cache_spec = [ord(c) for c in cache_res]
83799698 1139
69ea8ca4 1140 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1141 return res
1142
60064c53 1143 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1144 def gen_sig_code(idxs):
1145 def _genslice(start, end, step):
78caa52a 1146 starts = '' if start == 0 else str(start)
8bcc8756 1147 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1148 steps = '' if step == 1 else (':%d' % step)
78caa52a 1149 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1150
1151 step = None
7af808a5
PH
1152 # Quelch pyflakes warnings - start will be set when step is set
1153 start = '(Never used)'
edf3e38e
PH
1154 for i, prev in zip(idxs[1:], idxs[:-1]):
1155 if step is not None:
1156 if i - prev == step:
1157 continue
1158 yield _genslice(start, prev, step)
1159 step = None
1160 continue
1161 if i - prev in [-1, 1]:
1162 step = i - prev
1163 start = prev
1164 continue
1165 else:
78caa52a 1166 yield 's[%d]' % prev
edf3e38e 1167 if step is None:
78caa52a 1168 yield 's[%d]' % i
edf3e38e
PH
1169 else:
1170 yield _genslice(start, i, step)
1171
78caa52a 1172 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1173 cache_res = func(test_string)
edf3e38e 1174 cache_spec = [ord(c) for c in cache_res]
78caa52a 1175 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1176 signature_id_tuple = '(%s)' % (
1177 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1178 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1179 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1180 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1181
e0df6211
PH
1182 def _parse_sig_js(self, jscode):
1183 funcname = self._search_regex(
3c90cc8b 1184 (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35
S
1185 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1186 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1187 r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1188 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1189
1190 jsi = JSInterpreter(jscode)
1191 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1192 return lambda s: initial_function([s])
1193
1194 def _parse_sig_swf(self, file_contents):
54256267 1195 swfi = SWFInterpreter(file_contents)
78caa52a 1196 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1197 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1198 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1199 return lambda s: initial_function([s])
1200
83799698 1201 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1202 """Turn the encrypted s field into a working signature"""
6b37f0be 1203
c8bf86d5 1204 if player_url is None:
69ea8ca4 1205 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1206
69ea8ca4 1207 if player_url.startswith('//'):
78caa52a 1208 player_url = 'https:' + player_url
3c90cc8b
S
1209 elif not re.match(r'https?://', player_url):
1210 player_url = compat_urlparse.urljoin(
1211 'https://www.youtube.com', player_url)
c8bf86d5 1212 try:
62af3a0e 1213 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1214 if player_id not in self._player_cache:
1215 func = self._extract_signature_function(
60064c53 1216 video_id, player_url, s
c8bf86d5
PH
1217 )
1218 self._player_cache[player_id] = func
1219 func = self._player_cache[player_id]
1220 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1221 self._print_sig_code(func, s)
c8bf86d5
PH
1222 return func(s)
1223 except Exception as e:
1224 tb = traceback.format_exc()
1225 raise ExtractorError(
78caa52a 1226 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1227
360e1ca5 1228 def _get_subtitles(self, video_id, webpage):
de7f3446 1229 try:
60e47a26 1230 subs_doc = self._download_xml(
38c2e5b8 1231 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1232 video_id, note=False)
1233 except ExtractorError as err:
9b9c5355 1234 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1235 return {}
de7f3446
JMF
1236
1237 sub_lang_list = {}
60e47a26
JMF
1238 for track in subs_doc.findall('track'):
1239 lang = track.attrib['lang_code']
7e660ac1
LD
1240 if lang in sub_lang_list:
1241 continue
360e1ca5 1242 sub_formats = []
23d17e4b 1243 for ext in self._SUBTITLE_FORMATS:
15707c7e 1244 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1245 'lang': lang,
1246 'v': video_id,
1247 'fmt': ext,
1248 'name': track.attrib['name'].encode('utf-8'),
1249 })
1250 sub_formats.append({
1251 'url': 'https://www.youtube.com/api/timedtext?' + params,
1252 'ext': ext,
1253 })
1254 sub_lang_list[lang] = sub_formats
de7f3446 1255 if not sub_lang_list:
69ea8ca4 1256 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1257 return {}
1258 return sub_lang_list
1259
a72778d3
S
1260 def _get_ytplayer_config(self, video_id, webpage):
1261 patterns = (
526b3b07
S
1262 # User data may contain arbitrary character sequences that may affect
1263 # JSON extraction with regex, e.g. when '};' is contained the second
1264 # regex won't capture the whole JSON. Yet working around by trying more
1265 # concrete regex first keeping in mind proper quoted string handling
1266 # to be implemented in future that will replace this workaround (see
1267 # https://github.com/rg3/youtube-dl/issues/7468,
1268 # https://github.com/rg3/youtube-dl/pull/7599)
a72778d3
S
1269 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1270 r';ytplayer\.config\s*=\s*({.+?});',
1271 )
1272 config = self._search_regex(
1273 patterns, webpage, 'ytplayer.config', default=None)
1274 if config:
1275 return self._parse_json(
1276 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1277
360e1ca5 1278 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1279 """We need the webpage for getting the captions url, pass it as an
1280 argument to speed up the process."""
69ea8ca4 1281 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1282 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1283 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1284 if not player_config:
de7f3446
JMF
1285 self._downloader.report_warning(err_msg)
1286 return {}
de7f3446 1287 try:
0792d563 1288 args = player_config['args']
b78b292f
S
1289 caption_url = args.get('ttsurl')
1290 if caption_url:
1291 timestamp = args['timestamp']
1292 # We get the available subtitles
15707c7e 1293 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1294 'type': 'list',
1295 'tlangs': 1,
1296 'asrs': 1,
1297 })
1298 list_url = caption_url + '&' + list_params
1299 caption_list = self._download_xml(list_url, video_id)
1300 original_lang_node = caption_list.find('track')
1301 if original_lang_node is None:
1302 self._downloader.report_warning('Video doesn\'t have automatic captions')
1303 return {}
1304 original_lang = original_lang_node.attrib['lang_code']
1305 caption_kind = original_lang_node.attrib.get('kind', '')
1306
1307 sub_lang_list = {}
1308 for lang_node in caption_list.findall('target'):
1309 sub_lang = lang_node.attrib['lang_code']
1310 sub_formats = []
1311 for ext in self._SUBTITLE_FORMATS:
15707c7e 1312 params = compat_urllib_parse_urlencode({
b78b292f
S
1313 'lang': original_lang,
1314 'tlang': sub_lang,
1315 'fmt': ext,
1316 'ts': timestamp,
1317 'kind': caption_kind,
1318 })
1319 sub_formats.append({
1320 'url': caption_url + '&' + params,
1321 'ext': ext,
1322 })
1323 sub_lang_list[sub_lang] = sub_formats
1324 return sub_lang_list
1325
ddbb4c5c
S
1326 def make_captions(sub_url, sub_langs):
1327 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1328 caption_qs = compat_parse_qs(parsed_sub_url.query)
1329 captions = {}
1330 for sub_lang in sub_langs:
1331 sub_formats = []
1332 for ext in self._SUBTITLE_FORMATS:
1333 caption_qs.update({
1334 'tlang': [sub_lang],
1335 'fmt': [ext],
1336 })
1337 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1338 query=compat_urllib_parse_urlencode(caption_qs, True)))
1339 sub_formats.append({
1340 'url': sub_url,
1341 'ext': ext,
1342 })
1343 captions[sub_lang] = sub_formats
1344 return captions
1345
1346 # New captions format as of 22.06.2017
1347 player_response = args.get('player_response')
1348 if player_response and isinstance(player_response, compat_str):
1349 player_response = self._parse_json(
1350 player_response, video_id, fatal=False)
1351 if player_response:
1352 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1353 base_url = renderer['captionTracks'][0]['baseUrl']
1354 sub_lang_list = []
1355 for lang in renderer['translationLanguages']:
1356 lang_code = lang.get('languageCode')
1357 if lang_code:
1358 sub_lang_list.append(lang_code)
1359 return make_captions(base_url, sub_lang_list)
1360
b78b292f
S
1361 # Some videos don't provide ttsurl but rather caption_tracks and
1362 # caption_translation_languages (e.g. 20LmZk1hakA)
ddbb4c5c 1363 # Does not used anymore as of 22.06.2017
b78b292f
S
1364 caption_tracks = args['caption_tracks']
1365 caption_translation_languages = args['caption_translation_languages']
1366 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
ddbb4c5c 1367 sub_lang_list = []
b78b292f
S
1368 for lang in caption_translation_languages.split(','):
1369 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1370 sub_lang = lang_qs.get('lc', [None])[0]
ddbb4c5c
S
1371 if sub_lang:
1372 sub_lang_list.append(sub_lang)
1373 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1374 # An extractor error can be raise by the download process if there are
1375 # no automatic captions but there are subtitles
ddbb4c5c 1376 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1377 self._downloader.report_warning(err_msg)
1378 return {}
1379
d77ab8e2
S
1380 def _mark_watched(self, video_id, video_info):
1381 playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1382 if not playback_url:
1383 return
1384 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1385 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1386
1387 # cpn generation algorithm is reverse engineered from base.js.
1388 # In fact it works even with dummy cpn.
1389 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1390 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1391
1392 qs.update({
1393 'ver': ['2'],
1394 'cpn': [cpn],
1395 })
1396 playback_url = compat_urlparse.urlunparse(
15707c7e 1397 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1398
1399 self._download_webpage(
1400 playback_url, video_id, 'Marking watched',
1401 'Unable to mark watched', fatal=False)
1402
66c9fa36
S
1403 @staticmethod
1404 def _extract_urls(webpage):
1405 # Embedded YouTube player
1406 entries = [
1407 unescapeHTML(mobj.group('url'))
1408 for mobj in re.finditer(r'''(?x)
1409 (?:
1410 <iframe[^>]+?src=|
1411 data-video-url=|
1412 <embed[^>]+?src=|
1413 embedSWF\(?:\s*|
1414 <object[^>]+data=|
1415 new\s+SWFObject\(
1416 )
1417 (["\'])
1418 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1419 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1420 \1''', webpage)]
1421
1422 # lazyYT YouTube embed
1423 entries.extend(list(map(
1424 unescapeHTML,
1425 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1426
1427 # Wordpress "YouTube Video Importer" plugin
1428 matches = re.findall(r'''(?x)<div[^>]+
1429 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1430 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1431 entries.extend(m[-1] for m in matches)
1432
1433 return entries
1434
1435 @staticmethod
1436 def _extract_url(webpage):
1437 urls = YoutubeIE._extract_urls(webpage)
1438 return urls[0] if urls else None
1439
97665381
PH
1440 @classmethod
1441 def extract_id(cls, url):
1442 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1443 if mobj is None:
69ea8ca4 1444 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1445 video_id = mobj.group(2)
1446 return video_id
1447
1fb07d10
JG
1448 def _extract_annotations(self, video_id):
1449 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
69ea8ca4 1450 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1fb07d10 1451
9cafc3fd
S
1452 @staticmethod
1453 def _extract_chapters(description, duration):
1454 if not description:
1455 return None
1456 chapter_lines = re.findall(
1457 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1458 description)
1459 if not chapter_lines:
1460 return None
1461 chapters = []
1462 for next_num, (chapter_line, time_point) in enumerate(
1463 chapter_lines, start=1):
1464 start_time = parse_duration(time_point)
1465 if start_time is None:
1466 continue
39d4c1be
S
1467 if start_time > duration:
1468 break
9cafc3fd
S
1469 end_time = (duration if next_num == len(chapter_lines)
1470 else parse_duration(chapter_lines[next_num][1]))
1471 if end_time is None:
1472 continue
39d4c1be
S
1473 if end_time > duration:
1474 end_time = duration
1475 if start_time > end_time:
1476 break
9cafc3fd
S
1477 chapter_title = re.sub(
1478 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1479 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1480 chapters.append({
1481 'start_time': start_time,
1482 'end_time': end_time,
1483 'title': chapter_title,
1484 })
1485 return chapters
1486
c5e8d7af 1487 def _real_extract(self, url):
cf7e015f
S
1488 url, smuggled_data = unsmuggle_url(url, {})
1489
7e8c0af0 1490 proto = (
78caa52a
PH
1491 'http' if self._downloader.params.get('prefer_insecure', False)
1492 else 'https')
7e8c0af0 1493
7c80519c 1494 start_time = None
297a564b 1495 end_time = None
7c80519c
JMF
1496 parsed_url = compat_urllib_parse_urlparse(url)
1497 for component in [parsed_url.fragment, parsed_url.query]:
1498 query = compat_parse_qs(component)
297a564b 1499 if start_time is None and 't' in query:
7c80519c 1500 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1501 if start_time is None and 'start' in query:
1502 start_time = parse_duration(query['start'][0])
297a564b
JMF
1503 if end_time is None and 'end' in query:
1504 end_time = parse_duration(query['end'][0])
7c80519c 1505
c5e8d7af
PH
1506 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1507 mobj = re.search(self._NEXT_URL_RE, url)
1508 if mobj:
7fd002c0 1509 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1510 video_id = self.extract_id(url)
c5e8d7af
PH
1511
1512 # Get video webpage
aa79ac0c 1513 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
a1f934b1 1514 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
1515
1516 # Attempt to extract SWF player URL
e0df6211 1517 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1518 if mobj is not None:
1519 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1520 else:
1521 player_url = None
1522
d8d24a92
S
1523 dash_mpds = []
1524
1525 def add_dash_mpd(video_info):
1526 dash_mpd = video_info.get('dashmpd')
1527 if dash_mpd and dash_mpd[0] not in dash_mpds:
1528 dash_mpds.append(dash_mpd[0])
1529
c7121fa7
S
1530 is_live = None
1531 view_count = None
1532
1533 def extract_view_count(v_info):
1534 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1535
c5e8d7af 1536 # Get video info
6449cd80 1537 embed_webpage = None
c108eb73 1538 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
1539 age_gate = True
1540 # We simulate the access to the video from www.youtube.com/v/{video_id}
1541 # this can be viewed without login into Youtube
beb95e77
CL
1542 url = proto + '://www.youtube.com/embed/%s' % video_id
1543 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
15707c7e 1544 data = compat_urllib_parse_urlencode({
2c57c7fa
JMF
1545 'video_id': video_id,
1546 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 1547 'sts': self._search_regex(
beb95e77 1548 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 1549 })
7e8c0af0 1550 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
94bd3613
PH
1551 video_info_webpage = self._download_webpage(
1552 video_info_url, video_id,
20436c30 1553 note='Refetching age-gated info webpage',
94bd3613 1554 errnote='unable to download video info webpage')
c5e8d7af 1555 video_info = compat_parse_qs(video_info_webpage)
d8d24a92 1556 add_dash_mpd(video_info)
c108eb73
JMF
1557 else:
1558 age_gate = False
bc93bdb5 1559 video_info = None
dc4e4f90 1560 sts = None
d8d24a92 1561 # Try looking directly into the video webpage
a72778d3
S
1562 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1563 if ytplayer_config:
4e62ebe2 1564 args = ytplayer_config['args']
4c76aa06 1565 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1566 # Convert to the same format returned by compat_parse_qs
1567 video_info = dict((k, [v]) for k, v in args.items())
1568 add_dash_mpd(video_info)
6496ccb4
S
1569 # Rental video is not rented but preview is available (e.g.
1570 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1571 # https://github.com/rg3/youtube-dl/issues/10532)
1572 if not video_info and args.get('ypc_vid'):
1573 return self.url_result(
1574 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1575 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1576 is_live = True
dc4e4f90 1577 sts = ytplayer_config.get('sts')
0a3cf9ad
S
1578 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1579 # We also try looking in get_video_info since it may contain different dashmpd
1580 # URL that points to a DASH manifest with possibly different itag set (some itags
1581 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1582 # manifest pointed by get_video_info's dashmpd).
1583 # The general idea is to take a union of itags of both DASH manifests (for example
1584 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
4e62ebe2 1585 self.report_video_info_webpage_download(video_id)
dc4e4f90
S
1586 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1587 query = {
1588 'video_id': video_id,
1589 'ps': 'default',
1590 'eurl': '',
1591 'gl': 'US',
1592 'hl': 'en',
1593 }
1594 if el:
1595 query['el'] = el
1596 if sts:
1597 query['sts'] = sts
810fb84d 1598 video_info_webpage = self._download_webpage(
dc4e4f90 1599 '%s://www.youtube.com/get_video_info' % proto,
4e62ebe2 1600 video_id, note=False,
dc4e4f90
S
1601 errnote='unable to download video info webpage',
1602 fatal=False, query=query)
1603 if not video_info_webpage:
1604 continue
0a3cf9ad 1605 get_video_info = compat_parse_qs(video_info_webpage)
fd545fc6 1606 add_dash_mpd(get_video_info)
c7121fa7
S
1607 if view_count is None:
1608 view_count = extract_view_count(get_video_info)
0a3cf9ad
S
1609 if not video_info:
1610 video_info = get_video_info
1611 if 'token' in get_video_info:
89ea063e
S
1612 # Different get_video_info requests may report different results, e.g.
1613 # some may report video unavailability, but some may serve it without
1614 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1615 # the original webpage as well as el=info and el=embedded get_video_info
1616 # requests report video unavailability due to geo restriction while
1617 # el=detailpage succeeds and returns valid data). This is probably
1618 # due to YouTube measures against IP ranges of hosting providers.
1619 # Working around by preferring the first succeeded video_info containing
1620 # the token if no such video_info yet was found.
44b2264f
S
1621 if 'token' not in video_info:
1622 video_info = get_video_info
4e62ebe2 1623 break
bbb7c3f7
YCH
1624
1625 def extract_unavailable_message():
1626 return self._html_search_regex(
1627 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1628 video_webpage, 'unavailable message', default=None)
1629
c5e8d7af
PH
1630 if 'token' not in video_info:
1631 if 'reason' in video_info:
af214c3a 1632 if 'The uploader has not made this video available in your country.' in video_info['reason']:
fd5c4aab
S
1633 regions_allowed = self._html_search_meta(
1634 'regionsAllowed', video_webpage, default=None)
1635 countries = regions_allowed.split(',') if regions_allowed else None
1636 self.raise_geo_restricted(
1637 msg=video_info['reason'][0], countries=countries)
bbb7c3f7
YCH
1638 reason = video_info['reason'][0]
1639 if 'Invalid parameters' in reason:
1640 unavailable_message = extract_unavailable_message()
1641 if unavailable_message:
1642 reason = unavailable_message
d11271dd 1643 raise ExtractorError(
bbb7c3f7 1644 'YouTube said: %s' % reason,
d11271dd 1645 expected=True, video_id=video_id)
c5e8d7af 1646 else:
d11271dd 1647 raise ExtractorError(
78caa52a 1648 '"token" parameter not in video info for unknown reason',
d11271dd 1649 video_id=video_id)
c5e8d7af 1650
cf7e015f
S
1651 # title
1652 if 'title' in video_info:
1653 video_title = video_info['title'][0]
1654 else:
1655 self._downloader.report_warning('Unable to extract video title')
1656 video_title = '_'
1657
1658 # description
9cafc3fd 1659 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1660 if video_description:
fa4bc6e7
RA
1661
1662 def replace_url(m):
1663 redir_url = compat_urlparse.urljoin(url, m.group(1))
1664 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1665 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1666 qs = compat_parse_qs(parsed_redir_url.query)
1667 q = qs.get('q')
1668 if q and q[0]:
1669 return q[0]
1670 return redir_url
1671
9cafc3fd 1672 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1673 <a\s+
25cb7a0e 1674 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1675 (?:title|href)="([^"]+)"\s+
25cb7a0e 1676 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1677 class="[^"]*"[^>]*>
23f13e97 1678 [^<]+\.{3}\s*
cf7e015f 1679 </a>
fa4bc6e7 1680 ''', replace_url, video_description)
cf7e015f
S
1681 video_description = clean_html(video_description)
1682 else:
1683 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1684 if fd_mobj:
1685 video_description = unescapeHTML(fd_mobj.group(1))
1686 else:
1687 video_description = ''
1688
5e1eddb9
S
1689 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1690 if not self._downloader.params.get('noplaylist'):
1691 entries = []
1692 feed_ids = []
6863631c 1693 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
5e1eddb9 1694 for feed in multifeed_metadata_list.split(','):
6863631c
S
1695 # Unquote should take place before split on comma (,) since textual
1696 # fields may contain comma as well (see
1697 # https://github.com/rg3/youtube-dl/issues/8536)
1698 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
5e1eddb9
S
1699 entries.append({
1700 '_type': 'url_transparent',
1701 'ie_key': 'Youtube',
1702 'url': smuggle_url(
1703 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1704 {'force_singlefeed': True}),
1705 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1706 })
1707 feed_ids.append(feed_data['id'][0])
1708 self.to_screen(
1709 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1710 % (', '.join(feed_ids), video_id))
1711 return self.playlist_result(entries, video_id, video_title, video_description)
1712 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1713
c7121fa7 1714 if view_count is None:
1c9c8de2 1715 view_count = extract_view_count(video_info)
1d699755 1716
c5e8d7af
PH
1717 # Check for "rental" videos
1718 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
c9612c04 1719 raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 1720
c63ca0ee
S
1721 def _extract_filesize(media_url):
1722 return int_or_none(self._search_regex(
1723 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1724
c5e8d7af
PH
1725 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1726 self.report_rtmp_download()
dd27fd17
PH
1727 formats = [{
1728 'format_id': '_rtmp',
1729 'protocol': 'rtmp',
1730 'url': video_info['conn'][0],
1731 'player_url': player_url,
1732 }]
391dd6f0 1733 elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 1734 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1735 if 'rtmpe%3Dyes' in encoded_url_map:
a7055eb9 1736 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
3318832e 1737 formats_spec = {}
82156fdb 1738 fmt_list = video_info.get('fmt_list', [''])[0]
1739 if fmt_list:
1740 for fmt in fmt_list.split(','):
1741 spec = fmt.split('/')
3318832e 1742 if len(spec) > 1:
1743 width_height = spec[1].split('x')
1744 if len(width_height) == 2:
1745 formats_spec[spec[0]] = {
1746 'resolution': spec[1],
1747 'width': int_or_none(width_height[0]),
1748 'height': int_or_none(width_height[1]),
1749 }
54fc90aa 1750 q = qualities(['small', 'medium', 'hd720'])
c9afb51c 1751 formats = []
00fe14fc 1752 for url_data_str in encoded_url_map.split(','):
c5e8d7af 1753 url_data = compat_parse_qs(url_data_str)
201e9eaa
PH
1754 if 'itag' not in url_data or 'url' not in url_data:
1755 continue
1756 format_id = url_data['itag'][0]
1757 url = url_data['url'][0]
1758
a49eccdf 1759 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
6449cd80 1760 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
beb95e77 1761 jsplayer_url_json = self._search_regex(
6449cd80
PH
1762 ASSETS_RE,
1763 embed_webpage if age_gate else video_webpage,
1764 'JS player URL (1)', default=None)
1765 if not jsplayer_url_json and not age_gate:
1766 # We need the embed website after all
1767 if embed_webpage is None:
1768 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1769 embed_webpage = self._download_webpage(
1770 embed_url, video_id, 'Downloading embed webpage')
1771 jsplayer_url_json = self._search_regex(
1772 ASSETS_RE, embed_webpage, 'JS player URL')
1773
beb95e77 1774 player_url = json.loads(jsplayer_url_json)
201e9eaa
PH
1775 if player_url is None:
1776 player_url_json = self._search_regex(
1777 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
78caa52a 1778 video_webpage, 'age gate player URL')
201e9eaa
PH
1779 player_url = json.loads(player_url_json)
1780
a49eccdf
YCH
1781 if 'sig' in url_data:
1782 url += '&signature=' + url_data['sig'][0]
1783 elif 's' in url_data:
1784 encrypted_sig = url_data['s'][0]
1785
201e9eaa 1786 if self._downloader.params.get('verbose'):
cf010131 1787 if player_url is None:
201e9eaa
PH
1788 player_version = 'unknown'
1789 player_desc = 'unknown'
1790 else:
1791 if player_url.endswith('swf'):
1792 player_version = self._search_regex(
1793 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
78caa52a 1794 'flash player', fatal=False)
201e9eaa 1795 player_desc = 'flash player %s' % player_version
cf010131 1796 else:
201e9eaa 1797 player_version = self._search_regex(
b62985a9
YCH
1798 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1799 r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
201e9eaa
PH
1800 player_url,
1801 'html5 player', fatal=False)
78caa52a 1802 player_desc = 'html5 player %s' % player_version
201e9eaa 1803
60064c53 1804 parts_sizes = self._signature_cache_id(encrypted_sig)
69ea8ca4 1805 self.to_screen('{%s} signature length %s, %s' %
9e1a5b84 1806 (format_id, parts_sizes, player_desc))
201e9eaa
PH
1807
1808 signature = self._decrypt_signature(
1809 encrypted_sig, video_id, player_url, age_gate)
1810 url += '&signature=' + signature
1811 if 'ratebypass' not in url:
1812 url += '&ratebypass=yes'
c9afb51c 1813
94278f72
YCH
1814 dct = {
1815 'format_id': format_id,
1816 'url': url,
1817 'player_url': player_url,
1818 }
1819 if format_id in self._formats:
1820 dct.update(self._formats[format_id])
3318832e 1821 if format_id in formats_spec:
1822 dct.update(formats_spec[format_id])
94278f72 1823
aabc2be6
S
1824 # Some itags are not included in DASH manifest thus corresponding formats will
1825 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1826 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1827 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1828 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 1829
c63ca0ee
S
1830 filesize = int_or_none(url_data.get(
1831 'clen', [None])[0]) or _extract_filesize(url)
1832
54fc90aa
RA
1833 quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
1834
94278f72 1835 more_fields = {
c63ca0ee 1836 'filesize': filesize,
aabc2be6 1837 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
c9afb51c
AH
1838 'width': width,
1839 'height': height,
1840 'fps': int_or_none(url_data.get('fps', [None])[0]),
54fc90aa
RA
1841 'format_note': quality,
1842 'quality': q(quality),
c9afb51c 1843 }
94278f72
YCH
1844 for key, value in more_fields.items():
1845 if value:
1846 dct[key] = value
aabc2be6
S
1847 type_ = url_data.get('type', [None])[0]
1848 if type_:
1849 type_split = type_.split(';')
1850 kind_ext = type_split[0].split('/')
1851 if len(kind_ext) == 2:
94278f72
YCH
1852 kind, _ = kind_ext
1853 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
1854 if kind in ('audio', 'video'):
1855 codecs = None
1856 for mobj in re.finditer(
1857 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1858 if mobj.group('key') == 'codecs':
1859 codecs = mobj.group('val')
1860 break
1861 if codecs:
6310acf5 1862 dct.update(parse_codecs(codecs))
e4a60912
S
1863 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
1864 dct['downloader_options'] = {
1865 # Youtube throttles chunks >~10M
1866 'http_chunk_size': 10485760,
1867 }
aabc2be6 1868 formats.append(dct)
1d043b93
JMF
1869 elif video_info.get('hlsvp'):
1870 manifest_url = video_info['hlsvp'][0]
89beedd3
RA
1871 formats = []
1872 m3u8_formats = self._extract_m3u8_formats(
1873 manifest_url, video_id, 'mp4', fatal=False)
1874 for a_format in m3u8_formats:
1875 itag = self._search_regex(
1876 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
1877 if itag:
1878 a_format['format_id'] = itag
1879 if itag in self._formats:
1880 dct = self._formats[itag].copy()
1881 dct.update(a_format)
1882 a_format = dct
1883 a_format['player_url'] = player_url
1884 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
049d71d8 1885 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
89beedd3 1886 formats.append(a_format)
c5e8d7af 1887 else:
4c76aa06
RA
1888 error_message = clean_html(video_info.get('reason', [None])[0])
1889 if not error_message:
1890 error_message = extract_unavailable_message()
1891 if error_message:
1892 raise ExtractorError(error_message, expected=True)
69ea8ca4 1893 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 1894
7e72694b
S
1895 # uploader
1896 video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
1897 if video_uploader:
1898 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
1899 else:
1900 self._downloader.report_warning('unable to extract uploader name')
1901
1902 # uploader_id
1903 video_uploader_id = None
1904 video_uploader_url = None
1905 mobj = re.search(
1906 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1907 video_webpage)
1908 if mobj is not None:
1909 video_uploader_id = mobj.group('uploader_id')
1910 video_uploader_url = mobj.group('uploader_url')
1911 else:
1912 self._downloader.report_warning('unable to extract uploader nickname')
1913
dd4c4492
S
1914 channel_id = self._html_search_meta(
1915 'channelId', video_webpage, 'channel id')
1916 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
1917
7e72694b
S
1918 # thumbnail image
1919 # We try first to get a high quality image:
1920 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1921 video_webpage, re.DOTALL)
1922 if m_thumb is not None:
1923 video_thumbnail = m_thumb.group(1)
1924 elif 'thumbnail_url' not in video_info:
1925 self._downloader.report_warning('unable to extract video thumbnail')
1926 video_thumbnail = None
1927 else: # don't panic if we can't find it
1928 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1929
1930 # upload date
1931 upload_date = self._html_search_meta(
1932 'datePublished', video_webpage, 'upload date', default=None)
1933 if not upload_date:
1934 upload_date = self._search_regex(
1935 [r'(?s)id="eow-date.*?>(.*?)</span>',
1936 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
1937 video_webpage, 'upload date', default=None)
1938 upload_date = unified_strdate(upload_date)
1939
1940 video_license = self._html_search_regex(
1941 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1942 video_webpage, 'license', default=None)
1943
1944 m_music = re.search(
1945 r'''(?x)
1946 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
1947 <ul[^>]*>\s*
1948 <li>(?P<title>.+?)
1949 by (?P<creator>.+?)
1950 (?:
1951 \(.+?\)|
1952 <a[^>]*
1953 (?:
1954 \bhref=["\']/red[^>]*>| # drop possible
1955 >\s*Listen ad-free with YouTube Red # YouTube Red ad
1956 )
1957 .*?
1958 )?</li
1959 ''',
1960 video_webpage)
1961 if m_music:
1962 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1963 video_creator = clean_html(m_music.group('creator'))
1964 else:
1965 video_alt_title = video_creator = None
1966
1967 def extract_meta(field):
1968 return self._html_search_regex(
1969 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
1970 video_webpage, field, default=None)
1971
1972 track = extract_meta('Song')
1973 artist = extract_meta('Artist')
1974
1975 m_episode = re.search(
1976 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
1977 video_webpage)
1978 if m_episode:
1979 series = m_episode.group('series')
1980 season_number = int(m_episode.group('season'))
1981 episode_number = int(m_episode.group('episode'))
1982 else:
1983 series = season_number = episode_number = None
1984
1985 m_cat_container = self._search_regex(
1986 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1987 video_webpage, 'categories', default=None)
1988 if m_cat_container:
1989 category = self._html_search_regex(
1990 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1991 default=None)
1992 video_categories = None if category is None else [category]
1993 else:
1994 video_categories = None
1995
1996 video_tags = [
1997 unescapeHTML(m.group('content'))
1998 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1999
2000 def _extract_count(count_name):
2001 return str_to_int(self._search_regex(
2002 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2003 % re.escape(count_name),
2004 video_webpage, count_name, default=None))
2005
2006 like_count = _extract_count('like')
2007 dislike_count = _extract_count('dislike')
2008
2009 # subtitles
2010 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2011 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2012
2013 video_duration = try_get(
2014 video_info, lambda x: int_or_none(x['length_seconds'][0]))
2015 if not video_duration:
2016 video_duration = parse_duration(self._html_search_meta(
2017 'duration', video_webpage, 'video duration'))
2018
2019 # annotations
2020 video_annotations = None
2021 if self._downloader.params.get('writeannotations', False):
2022 video_annotations = self._extract_annotations(video_id)
2023
2024 chapters = self._extract_chapters(description_original, video_duration)
2025
dd27fd17 2026 # Look for the DASH manifest
203fb43f 2027 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2028 dash_mpd_fatal = True
8ff648e4 2029 for mpd_url in dash_mpds:
d8d24a92 2030 dash_formats = {}
774e208f 2031 try:
05d0d131
YCH
2032 def decrypt_sig(mobj):
2033 s = mobj.group(1)
2034 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2035 return '/signature/%s' % dec_s
2036
8ff648e4 2037 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2038
8ff648e4 2039 for df in self._extract_mpd_formats(
2040 mpd_url, video_id, fatal=dash_mpd_fatal,
2041 formats_dict=self._formats):
c63ca0ee
S
2042 if not df.get('filesize'):
2043 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2044 # Do not overwrite DASH format found in some previous DASH manifest
2045 if df['format_id'] not in dash_formats:
2046 dash_formats[df['format_id']] = df
77c6fb5b
S
2047 # Additional DASH manifests may end up in HTTP Error 403 therefore
2048 # allow them to fail without bug report message if we already have
2049 # some DASH manifest succeeded. This is temporary workaround to reduce
2050 # burst of bug reports until we figure out the reason and whether it
2051 # can be fixed at all.
2052 dash_mpd_fatal = False
774e208f
PH
2053 except (ExtractorError, KeyError) as e:
2054 self.report_warning(
2055 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2056 if dash_formats:
04b3b3df
JMF
2057 # Remove the formats we found through non-DASH, they
2058 # contain less info and it can be wrong, because we use
2059 # fixed values (for example the resolution). See
2060 # https://github.com/rg3/youtube-dl/issues/5774 for an
2061 # example.
d80265cc 2062 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2063 formats.extend(dash_formats.values())
d80044c2 2064
6271f1ca
PH
2065 # Check for malformed aspect ratio
2066 stretched_m = re.search(
2067 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2068 video_webpage)
2069 if stretched_m:
313dfc45
LL
2070 w = float(stretched_m.group('w'))
2071 h = float(stretched_m.group('h'))
5faf9fed
S
2072 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2073 # We will only process correct ratios.
313dfc45 2074 if w > 0 and h > 0:
41f24c32 2075 ratio = w / h
313dfc45
LL
2076 for f in formats:
2077 if f.get('vcodec') != 'none':
2078 f['stretched_ratio'] = ratio
6271f1ca 2079
4bcc7bd1 2080 self._sort_formats(formats)
4ea3be0a 2081
d77ab8e2
S
2082 self.mark_watched(video_id, video_info)
2083
4ea3be0a 2084 return {
8bcc8756
JW
2085 'id': video_id,
2086 'uploader': video_uploader,
2087 'uploader_id': video_uploader_id,
fd050249 2088 'uploader_url': video_uploader_url,
dd4c4492
S
2089 'channel_id': channel_id,
2090 'channel_url': channel_url,
8bcc8756 2091 'upload_date': upload_date,
7caf9830 2092 'license': video_license,
936784b2 2093 'creator': video_creator or artist,
8bcc8756 2094 'title': video_title,
936784b2 2095 'alt_title': video_alt_title or track,
8bcc8756
JW
2096 'thumbnail': video_thumbnail,
2097 'description': video_description,
2098 'categories': video_categories,
000b6b5a 2099 'tags': video_tags,
8bcc8756 2100 'subtitles': video_subtitles,
360e1ca5 2101 'automatic_captions': automatic_captions,
8bcc8756
JW
2102 'duration': video_duration,
2103 'age_limit': 18 if age_gate else 0,
2104 'annotations': video_annotations,
9cafc3fd 2105 'chapters': chapters,
7e8c0af0 2106 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2107 'view_count': view_count,
4ea3be0a 2108 'like_count': like_count,
2109 'dislike_count': dislike_count,
2d30521a 2110 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
8bcc8756 2111 'formats': formats,
2fe1ff85 2112 'is_live': is_live,
7c80519c 2113 'start_time': start_time,
297a564b 2114 'end_time': end_time,
12afdc2a
S
2115 'series': series,
2116 'season_number': season_number,
2117 'episode_number': episode_number,
936784b2
S
2118 'track': track,
2119 'artist': artist,
4ea3be0a 2120 }
c5e8d7af 2121
5f6a1245 2122
8e7aad20 2123class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2124 IE_DESC = 'YouTube.com playlists'
d67cc9fa 2125 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
2126 (?:https?://)?
2127 (?:\w+\.)?
c5e8d7af 2128 (?:
feaa5ad7
S
2129 youtube\.com/
2130 (?:
87dadd45 2131 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
feaa5ad7
S
2132 \? (?:.*?[&;])*? (?:p|a|list)=
2133 | p/
2134 )|
2135 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
c5e8d7af 2136 )
d67cc9fa 2137 (
409b9324 2138 (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
5f6a1245 2139 # Top tracks, they can also include dots
d67cc9fa
JMF
2140 |(?:MC)[\w\.]*
2141 )
c5e8d7af
PH
2142 .*
2143 |
d0ba5587
S
2144 (%(playlist_id)s)
2145 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
8d81f3e3 2146 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
648e6a1f 2147 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
78caa52a 2148 IE_NAME = 'youtube:playlist'
81127aa5
PH
2149 _TESTS = [{
2150 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2151 'info_dict': {
2152 'title': 'ytdl test PL',
a1cf99d0 2153 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
81127aa5
PH
2154 },
2155 'playlist_count': 3,
9291475f
PH
2156 }, {
2157 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2158 'info_dict': {
acf757f4 2159 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
9291475f
PH
2160 'title': 'YDL_Empty_List',
2161 },
2162 'playlist_count': 0,
4201ba13 2163 'skip': 'This playlist is private',
9291475f
PH
2164 }, {
2165 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2166 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2167 'info_dict': {
2168 'title': '29C3: Not my department',
acf757f4 2169 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
9291475f
PH
2170 },
2171 'playlist_count': 95,
2172 }, {
2173 'note': 'issue #673',
2174 'url': 'PLBB231211A4F62143',
2175 'info_dict': {
f46a8702 2176 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 2177 'id': 'PLBB231211A4F62143',
9291475f
PH
2178 },
2179 'playlist_mincount': 26,
2180 }, {
2181 'note': 'Large playlist',
2182 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2183 'info_dict': {
2184 'title': 'Uploads from Cauchemar',
acf757f4 2185 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
9291475f
PH
2186 },
2187 'playlist_mincount': 799,
2188 }, {
2189 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2190 'info_dict': {
2191 'title': 'YDL_safe_search',
acf757f4 2192 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
2193 },
2194 'playlist_count': 2,
4201ba13 2195 'skip': 'This playlist is private',
ac7553d0
PH
2196 }, {
2197 'note': 'embedded',
2d3d2997 2198 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0
PH
2199 'playlist_count': 4,
2200 'info_dict': {
2201 'title': 'JODA15',
acf757f4 2202 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0 2203 }
87dadd45
S
2204 }, {
2205 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2206 'playlist_mincount': 485,
2207 'info_dict': {
2208 'title': '2017 華語最新單曲 (2/24更新)',
2209 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2210 }
6b08cdf6
PH
2211 }, {
2212 'note': 'Embedded SWF player',
2d3d2997 2213 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
6b08cdf6
PH
2214 'playlist_count': 4,
2215 'info_dict': {
2216 'title': 'JODA7',
acf757f4 2217 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
6b08cdf6 2218 }
4b7df0d3
JMF
2219 }, {
2220 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2221 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2222 'info_dict': {
acf757f4
PH
2223 'title': 'Uploads from Interstellar Movie',
2224 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2225 },
481cc733 2226 'playlist_mincount': 21,
dacb3a86
S
2227 }, {
2228 # Playlist URL that does not actually serve a playlist
2229 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2230 'info_dict': {
2231 'id': 'FqZTN594JQw',
2232 'ext': 'webm',
2233 'title': "Smiley's People 01 detective, Adventure Series, Action",
2234 'uploader': 'STREEM',
2235 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2236 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2237 'upload_date': '20150526',
2238 'license': 'Standard YouTube License',
2239 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2240 'categories': ['People & Blogs'],
2241 'tags': list,
2242 'like_count': int,
2243 'dislike_count': int,
2244 },
2245 'params': {
2246 'skip_download': True,
2247 },
2248 'add_ie': [YoutubeIE.ie_key()],
481cc733
S
2249 }, {
2250 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2251 'info_dict': {
2252 'id': 'yeWKywCrFtk',
2253 'ext': 'mp4',
2254 'title': 'Small Scale Baler and Braiding Rugs',
2255 'uploader': 'Backus-Page House Museum',
2256 'uploader_id': 'backuspagemuseum',
ec85ded8 2257 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
481cc733
S
2258 'upload_date': '20161008',
2259 'license': 'Standard YouTube License',
2260 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2261 'categories': ['Nonprofits & Activism'],
2262 'tags': list,
2263 'like_count': int,
2264 'dislike_count': int,
2265 },
2266 'params': {
2267 'noplaylist': True,
2268 'skip_download': True,
2269 },
feaa5ad7
S
2270 }, {
2271 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2272 'only_matching': True,
a6857510
S
2273 }, {
2274 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2275 'only_matching': True,
409b9324
S
2276 }, {
2277 # music album playlist
2278 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2279 'only_matching': True,
81127aa5 2280 }]
c5e8d7af 2281
880e1c52
JMF
2282 def _real_initialize(self):
2283 self._login()
2284
652cdaa2 2285 def _extract_mix(self, playlist_id):
99209c29 2286 # The mixes are generated from a single video
652cdaa2 2287 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
2288 ids = []
2289 last_id = playlist_id[-11:]
2290 for n in itertools.count(1):
2291 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2292 webpage = self._download_webpage(
2293 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2294 new_ids = orderedSet(re.findall(
2295 r'''(?xs)data-video-username=".*?".*?
2296 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2297 webpage))
2298 # Fetch new pages until all the videos are repeated, it seems that
2299 # there are always 51 unique videos.
2300 new_ids = [_id for _id in new_ids if _id not in ids]
2301 if not new_ids:
2302 break
2303 ids.extend(new_ids)
2304 last_id = ids[-1]
2305
2306 url_results = self._ids_to_results(ids)
2307
bc2f773b 2308 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5
PH
2309 title_span = (
2310 search_title('playlist-title') or
2311 search_title('title long-title') or
2312 search_title('title'))
76d1700b 2313 title = clean_html(title_span)
652cdaa2
JMF
2314
2315 return self.playlist_result(url_results, playlist_id, title)
2316
448830ce 2317 def _extract_playlist(self, playlist_id):
dbb94fb0
S
2318 url = self._TEMPLATE_URL % playlist_id
2319 page = self._download_webpage(url, playlist_id)
dbb94fb0 2320
8bc0800d
G
2321 # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
2322 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
39b62db1
YCH
2323 match = match.strip()
2324 # Check if the playlist exists or is private
4201ba13
S
2325 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2326 if mobj:
2327 reason = mobj.group('reason')
2328 message = 'This playlist %s' % reason
2329 if 'private' in reason:
2330 message += ', use --username or --netrc to access it'
2331 message += '.'
2332 raise ExtractorError(message, expected=True)
39b62db1
YCH
2333 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2334 raise ExtractorError(
2335 'Invalid parameters. Maybe URL is incorrect.',
2336 expected=True)
2337 elif re.match(r'[^<]*Choose your language[^<]*', match):
2338 continue
2339 else:
2340 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 2341
dbb94fb0 2342 playlist_title = self._html_search_regex(
63b4295d 2343 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
dacb3a86 2344 page, 'title', default=None)
c5e8d7af 2345
07aeced6
S
2346 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2347 uploader = self._search_regex(
2348 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2349 page, 'uploader', default=None)
2350 mobj = re.search(
2351 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2352 page)
2353 if mobj:
2354 uploader_id = mobj.group('uploader_id')
2355 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2356 else:
2357 uploader_id = uploader_url = None
2358
dacb3a86
S
2359 has_videos = True
2360
2361 if not playlist_title:
2362 try:
2363 # Some playlist URLs don't actually serve a playlist (e.g.
2364 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2365 next(self._entries(page, playlist_id))
2366 except StopIteration:
2367 has_videos = False
2368
07aeced6 2369 playlist = self.playlist_result(
dacb3a86 2370 self._entries(page, playlist_id), playlist_id, playlist_title)
07aeced6
S
2371 playlist.update({
2372 'uploader': uploader,
2373 'uploader_id': uploader_id,
2374 'uploader_url': uploader_url,
2375 })
2376
2377 return has_videos, playlist
c5e8d7af 2378
ebf1b291 2379 def _check_download_just_video(self, url, playlist_id):
448830ce
S
2380 # Check if it's a video-specific URL
2381 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
481cc733 2382 video_id = query_dict.get('v', [None])[0] or self._search_regex(
87dadd45 2383 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
481cc733
S
2384 'video id', default=None)
2385 if video_id:
448830ce
S
2386 if self._downloader.params.get('noplaylist'):
2387 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
dacb3a86 2388 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce
S
2389 else:
2390 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
dacb3a86
S
2391 return video_id, None
2392 return None, None
448830ce 2393
ebf1b291
S
2394 def _real_extract(self, url):
2395 # Extract playlist id
2396 mobj = re.match(self._VALID_URL, url)
2397 if mobj is None:
2398 raise ExtractorError('Invalid URL: %s' % url)
2399 playlist_id = mobj.group(1) or mobj.group(2)
2400
dacb3a86 2401 video_id, video = self._check_download_just_video(url, playlist_id)
ebf1b291
S
2402 if video:
2403 return video
2404
466a6145 2405 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
2406 # Mixes require a custom extraction process
2407 return self._extract_mix(playlist_id)
2408
dacb3a86
S
2409 has_videos, playlist = self._extract_playlist(playlist_id)
2410 if has_videos or not video_id:
2411 return playlist
2412
2413 # Some playlist URLs don't actually serve a playlist (see
2414 # https://github.com/rg3/youtube-dl/issues/10537).
2415 # Fallback to plain video extraction if there is a video id
2416 # along with playlist id.
2417 return self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce 2418
c5e8d7af 2419
648e6a1f 2420class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2421 IE_DESC = 'YouTube.com channels'
9ff67727 2422 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 2423 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 2424 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 2425 IE_NAME = 'youtube:channel'
cdc628a4
PH
2426 _TESTS = [{
2427 'note': 'paginated channel',
2428 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2429 'playlist_mincount': 91,
acf757f4 2430 'info_dict': {
9170ca5b
JMF
2431 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2432 'title': 'Uploads from lex will',
acf757f4 2433 }
5c43afd4
JMF
2434 }, {
2435 'note': 'Age restricted channel',
2436 # from https://www.youtube.com/user/DeusExOfficial
2437 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2438 'playlist_mincount': 64,
2439 'info_dict': {
2440 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2441 'title': 'Uploads from Deus Ex',
2442 },
cdc628a4 2443 }]
c5e8d7af 2444
e462474e
S
2445 @classmethod
2446 def suitable(cls, url):
f07e276a
S
2447 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2448 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 2449
9558dcec
S
2450 def _build_template_url(self, url, channel_id):
2451 return self._TEMPLATE_URL % channel_id
2452
c5e8d7af 2453 def _real_extract(self, url):
9ff67727 2454 channel_id = self._match_id(url)
c5e8d7af 2455
9558dcec 2456 url = self._build_template_url(url, channel_id)
386bdfa6
S
2457
2458 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2459 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2460 # otherwise fallback on channel by page extraction
2461 channel_page = self._download_webpage(
2462 url + '?view=57', channel_id,
2463 'Downloading channel page', fatal=False)
2b3c2546
PH
2464 if channel_page is False:
2465 channel_playlist_id = False
2466 else:
2467 channel_playlist_id = self._html_search_meta(
2468 'channelId', channel_page, 'channel id', default=None)
2469 if not channel_playlist_id:
73c4ac2c
S
2470 channel_url = self._html_search_meta(
2471 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2472 channel_page, 'channel url', default=None)
2473 if channel_url:
2474 channel_playlist_id = self._search_regex(
2475 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2476 channel_url, 'channel id', default=None)
386bdfa6
S
2477 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2478 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
2479 return self.url_result(
2480 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 2481
60bf45c8 2482 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
2483 autogenerated = re.search(r'''(?x)
2484 class="[^"]*?(?:
2485 channel-header-autogenerated-label|
2486 yt-channel-title-autogenerated
2487 )[^"]*"''', channel_page) is not None
c5e8d7af 2488
b9643eed
JMF
2489 if autogenerated:
2490 # The videos are contained in a single page
2491 # the ajax pages can't be used, they are empty
b82f815f 2492 entries = [
fb69240c
S
2493 self.url_result(
2494 video_id, 'Youtube', video_id=video_id,
2495 video_title=video_title)
8f02ad4f 2496 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
2497 return self.playlist_result(entries, channel_id)
2498
73c4ac2c
S
2499 try:
2500 next(self._entries(channel_page, channel_id))
2501 except StopIteration:
2502 alert_message = self._html_search_regex(
2503 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2504 channel_page, 'alert', default=None, group='alert')
2505 if alert_message:
2506 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2507
648e6a1f 2508 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
2509
2510
eb0f3e7e 2511class YoutubeUserIE(YoutubeChannelIE):
78caa52a 2512 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
ea696249 2513 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
9558dcec 2514 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 2515 IE_NAME = 'youtube:user'
c5e8d7af 2516
cdc628a4
PH
2517 _TESTS = [{
2518 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2519 'playlist_mincount': 320,
2520 'info_dict': {
73c4ac2c
S
2521 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2522 'title': 'Uploads from The Linux Foundation',
cdc628a4 2523 }
9558dcec
S
2524 }, {
2525 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2526 # but not https://www.youtube.com/user/12minuteathlete/videos
2527 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2528 'playlist_mincount': 249,
2529 'info_dict': {
2530 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2531 'title': 'Uploads from 12 Minute Athlete',
2532 }
cdc628a4
PH
2533 }, {
2534 'url': 'ytuser:phihag',
2535 'only_matching': True,
daa0df9e
YCH
2536 }, {
2537 'url': 'https://www.youtube.com/c/gametrailers',
2538 'only_matching': True,
9558dcec
S
2539 }, {
2540 'url': 'https://www.youtube.com/gametrailers',
2541 'only_matching': True,
73c4ac2c 2542 }, {
0e879f43 2543 # This channel is not available, geo restricted to JP
73c4ac2c
S
2544 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2545 'only_matching': True,
cdc628a4
PH
2546 }]
2547
e3ea4790 2548 @classmethod
f4b05232 2549 def suitable(cls, url):
e3ea4790
JMF
2550 # Don't return True if the url can be extracted with other youtube
2551 # extractor, the regex would is too permissive and it would match.
f3a58d46 2552 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2553 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
2554 return False
2555 else:
2556 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 2557
9558dcec
S
2558 def _build_template_url(self, url, channel_id):
2559 mobj = re.match(self._VALID_URL, url)
2560 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2561
b05654f0 2562
f07e276a
S
2563class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2564 IE_DESC = 'YouTube.com live streams'
073d5bf5 2565 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
f07e276a
S
2566 IE_NAME = 'youtube:live'
2567
2568 _TESTS = [{
2d3d2997 2569 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
f07e276a
S
2570 'info_dict': {
2571 'id': 'a48o2S1cPoo',
2572 'ext': 'mp4',
2573 'title': 'The Young Turks - Live Main Show',
2574 'uploader': 'The Young Turks',
2575 'uploader_id': 'TheYoungTurks',
ec85ded8 2576 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
f07e276a
S
2577 'upload_date': '20150715',
2578 'license': 'Standard YouTube License',
2579 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2580 'categories': ['News & Politics'],
2581 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2582 'like_count': int,
2583 'dislike_count': int,
2584 },
2585 'params': {
2586 'skip_download': True,
2587 },
2588 }, {
2d3d2997 2589 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
f07e276a 2590 'only_matching': True,
c1b2a085
S
2591 }, {
2592 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2593 'only_matching': True,
073d5bf5
S
2594 }, {
2595 'url': 'https://www.youtube.com/TheYoungTurks/live',
2596 'only_matching': True,
f07e276a
S
2597 }]
2598
2599 def _real_extract(self, url):
2600 mobj = re.match(self._VALID_URL, url)
2601 channel_id = mobj.group('id')
2602 base_url = mobj.group('base_url')
2603 webpage = self._download_webpage(url, channel_id, fatal=False)
2604 if webpage:
2605 page_type = self._og_search_property(
e7f3529f 2606 'type', webpage, 'page type', default='')
f07e276a
S
2607 video_id = self._html_search_meta(
2608 'videoId', webpage, 'video id', default=None)
e7f3529f
S
2609 if page_type.startswith('video') and video_id and re.match(
2610 r'^[0-9A-Za-z_-]{11}$', video_id):
f07e276a
S
2611 return self.url_result(video_id, YoutubeIE.ie_key())
2612 return self.url_result(base_url)
2613
2614
e462474e
S
2615class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2616 IE_DESC = 'YouTube.com user/channel playlists'
2617 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2618 IE_NAME = 'youtube:playlists'
0c148415 2619
e568c223 2620 _TESTS = [{
2d3d2997 2621 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
0c148415
S
2622 'playlist_mincount': 4,
2623 'info_dict': {
2624 'id': 'ThirstForScience',
2625 'title': 'Thirst for Science',
2626 },
e568c223
S
2627 }, {
2628 # with "Load more" button
2d3d2997 2629 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
e568c223
S
2630 'playlist_mincount': 70,
2631 'info_dict': {
2632 'id': 'igorkle1',
2633 'title': 'Игорь Клейнер',
2634 },
e462474e
S
2635 }, {
2636 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2637 'playlist_mincount': 17,
2638 'info_dict': {
2639 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2640 'title': 'Chem Player',
2641 },
e568c223 2642 }]
0c148415
S
2643
2644
870f3bfc
S
2645class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2646 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2647
2648
2649class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
78caa52a 2650 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
2651 # there doesn't appear to be a real limit, for example if you search for
2652 # 'python' you get more than 8.000.000 results
2653 _MAX_RESULTS = float('inf')
78caa52a 2654 IE_NAME = 'youtube:search'
b05654f0 2655 _SEARCH_KEY = 'ytsearch'
b4c08069 2656 _EXTRA_QUERY_ARGS = {}
9dd8e46a 2657 _TESTS = []
b05654f0 2658
b05654f0
PH
2659 def _get_n_results(self, query, n):
2660 """Get a specified number of results for a query"""
2661
b4c08069 2662 videos = []
b05654f0
PH
2663 limit = n
2664
a22b2fd1
YCH
2665 url_query = {
2666 'search_query': query.encode('utf-8'),
2667 }
2668 url_query.update(self._EXTRA_QUERY_ARGS)
2669 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2670
b4c08069 2671 for pagenum in itertools.count(1):
b4c08069 2672 data = self._download_json(
69ea8ca4 2673 result_url, video_id='query "%s"' % query,
b4c08069 2674 note='Downloading page %s' % pagenum,
a22b2fd1
YCH
2675 errnote='Unable to download API page',
2676 query={'spf': 'navigate'})
b4c08069 2677 html_content = data[1]['body']['content']
7cc3570e 2678
b4c08069 2679 if 'class="search-message' in html_content:
07ad22b8 2680 raise ExtractorError(
78caa52a 2681 '[youtube] No video results', expected=True)
b05654f0 2682
870f3bfc 2683 new_videos = list(self._process_page(html_content))
b4c08069
JMF
2684 videos += new_videos
2685 if not new_videos or len(videos) > limit:
2686 break
a22b2fd1
YCH
2687 next_link = self._html_search_regex(
2688 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2689 html_content, 'next link', default=None)
2690 if next_link is None:
2691 break
2692 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
b05654f0 2693
b4c08069
JMF
2694 if len(videos) > n:
2695 videos = videos[:n]
b05654f0 2696 return self.playlist_result(videos, query)
75dff0ee 2697
c9ae7b95 2698
a3dd9248 2699class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 2700 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 2701 _SEARCH_KEY = 'ytsearchdate'
78caa52a 2702 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 2703 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 2704
c9ae7b95 2705
870f3bfc 2706class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
78caa52a
PH
2707 IE_DESC = 'YouTube.com search URLs'
2708 IE_NAME = 'youtube:search_url'
d2c1f79f 2709 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
cdc628a4
PH
2710 _TESTS = [{
2711 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2712 'playlist_mincount': 5,
2713 'info_dict': {
2714 'title': 'youtube-dl test video',
2715 }
d2c1f79f
S
2716 }, {
2717 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2718 'only_matching': True,
cdc628a4 2719 }]
c9ae7b95
PH
2720
2721 def _real_extract(self, url):
2722 mobj = re.match(self._VALID_URL, url)
7fd002c0 2723 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 2724 webpage = self._download_webpage(url, query)
175c2e9e 2725 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
2726
2727
136dadde 2728class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 2729 IE_DESC = 'YouTube.com (multi-season) shows'
92519402 2730 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 2731 IE_NAME = 'youtube:show'
cdc628a4 2732 _TESTS = [{
4003bd82 2733 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 2734 'playlist_mincount': 5,
cdc628a4
PH
2735 'info_dict': {
2736 'id': 'airdisasters',
2737 'title': 'Air Disasters',
2738 }
2739 }]
75dff0ee
JMF
2740
2741 def _real_extract(self, url):
136dadde
S
2742 playlist_id = self._match_id(url)
2743 return super(YoutubeShowIE, self)._real_extract(
2744 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
2745
2746
b2e8bc1b 2747class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 2748 """
25f14e9f 2749 Base class for feed extractors
d7ae0639
JMF
2750 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2751 """
b2e8bc1b 2752 _LOGIN_REQUIRED = True
d7ae0639
JMF
2753
2754 @property
2755 def IE_NAME(self):
78caa52a 2756 return 'youtube:%s' % self._FEED_NAME
04cc9617 2757
81f0259b 2758 def _real_initialize(self):
b2e8bc1b 2759 self._login()
81f0259b 2760
3853309f 2761 def _entries(self, page):
2bc43303
JMF
2762 # The extraction process is the same as for playlists, but the regex
2763 # for the video ids doesn't contain an index
2764 ids = []
2765 more_widget_html = content_html = page
2bc43303
JMF
2766 for page_num in itertools.count(1):
2767 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
2768
2769 # 'recommended' feed has infinite 'load more' and each new portion spins
2770 # the same videos in (sometimes) slightly different order, so we'll check
2771 # for unicity and break when portion has no new videos
3853309f 2772 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
62c95fd5
S
2773 if not new_ids:
2774 break
2775
2bc43303
JMF
2776 ids.extend(new_ids)
2777
3853309f
S
2778 for entry in self._ids_to_results(new_ids):
2779 yield entry
2780
2bc43303
JMF
2781 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2782 if not mobj:
2783 break
2784
2785 more = self._download_json(
25f14e9f 2786 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303
JMF
2787 'Downloading page #%s' % page_num,
2788 transform_source=uppercase_escape)
2789 content_html = more['content_html']
2790 more_widget_html = more['load_more_widget_html']
2791
3853309f
S
2792 def _real_extract(self, url):
2793 page = self._download_webpage(
2794 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
2795 self._PLAYLIST_TITLE)
25f14e9f 2796 return self.playlist_result(
3853309f 2797 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
25f14e9f
S
2798
2799
2800class YoutubeWatchLaterIE(YoutubePlaylistIE):
2801 IE_NAME = 'youtube:watchlater'
2802 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
92519402 2803 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 2804
bc7a9cd8
S
2805 _TESTS = [{
2806 'url': 'https://www.youtube.com/playlist?list=WL',
2807 'only_matching': True,
2808 }, {
2809 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2810 'only_matching': True,
2811 }]
25f14e9f
S
2812
2813 def _real_extract(self, url):
7e5dc339 2814 _, video = self._check_download_just_video(url, 'WL')
ebf1b291
S
2815 if video:
2816 return video
dacb3a86
S
2817 _, playlist = self._extract_playlist('WL')
2818 return playlist
f459d170 2819
5f6a1245 2820
c626a3d9 2821class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 2822 IE_NAME = 'youtube:favorites'
f3a34072 2823 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
92519402 2824 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
2825 _LOGIN_REQUIRED = True
2826
2827 def _real_extract(self, url):
2828 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 2829 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 2830 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
2831
2832
25f14e9f
S
2833class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2834 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
92519402 2835 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
25f14e9f
S
2836 _FEED_NAME = 'recommended'
2837 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 2838
1ed5b5c9 2839
25f14e9f
S
2840class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2841 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
92519402 2842 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
25f14e9f
S
2843 _FEED_NAME = 'subscriptions'
2844 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 2845
1ed5b5c9 2846
25f14e9f
S
2847class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2848 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
92519402 2849 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
25f14e9f
S
2850 _FEED_NAME = 'history'
2851 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
2852
2853
15870e90
PH
2854class YoutubeTruncatedURLIE(InfoExtractor):
2855 IE_NAME = 'youtube:truncated_url'
2856 IE_DESC = False # Do not list
975d35db 2857 _VALID_URL = r'''(?x)
b95aab84
PH
2858 (?:https?://)?
2859 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2860 (?:watch\?(?:
c4808c60 2861 feature=[a-z_]+|
b95aab84
PH
2862 annotation_id=annotation_[^&]+|
2863 x-yt-cl=[0-9]+|
c1708b89 2864 hl=[^&]*|
287be8c6 2865 t=[0-9]+
b95aab84
PH
2866 )?
2867 |
2868 attribution_link\?a=[^&]+
2869 )
2870 $
975d35db 2871 '''
15870e90 2872
c4808c60 2873 _TESTS = [{
2d3d2997 2874 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 2875 'only_matching': True,
dc2fc736 2876 }, {
2d3d2997 2877 'url': 'https://www.youtube.com/watch?',
dc2fc736 2878 'only_matching': True,
b95aab84
PH
2879 }, {
2880 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2881 'only_matching': True,
2882 }, {
2883 'url': 'https://www.youtube.com/watch?feature=foo',
2884 'only_matching': True,
c1708b89
PH
2885 }, {
2886 'url': 'https://www.youtube.com/watch?hl=en-GB',
2887 'only_matching': True,
287be8c6
PH
2888 }, {
2889 'url': 'https://www.youtube.com/watch?t=2372',
2890 'only_matching': True,
c4808c60
PH
2891 }]
2892
15870e90
PH
2893 def _real_extract(self, url):
2894 raise ExtractorError(
78caa52a
PH
2895 'Did you forget to quote the URL? Remember that & is a meta '
2896 'character in most shells, so you want to put the URL in quotes, '
2897 'like youtube-dl '
2d3d2997 2898 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
78caa52a 2899 ' or simply youtube-dl BaW_jenozKc .',
15870e90 2900 expected=True)
772fd5cc
PH
2901
2902
2903class YoutubeTruncatedIDIE(InfoExtractor):
2904 IE_NAME = 'youtube:truncated_id'
2905 IE_DESC = False # Do not list
b95aab84 2906 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
2907
2908 _TESTS = [{
2909 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2910 'only_matching': True,
2911 }]
2912
2913 def _real_extract(self, url):
2914 video_id = self._match_id(url)
2915 raise ExtractorError(
2916 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2917 expected=True)