]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
[youtube] Move metadata extraction after video availability check
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
8d81f3e3 19 compat_kwargs,
c5e8d7af 20 compat_parse_qs,
7fd002c0
S
21 compat_urllib_parse_unquote,
22 compat_urllib_parse_unquote_plus,
15707c7e 23 compat_urllib_parse_urlencode,
7c80519c 24 compat_urllib_parse_urlparse,
7c61bd36 25 compat_urlparse,
c5e8d7af 26 compat_str,
4bb4a188
PH
27)
28from ..utils import (
c5e8d7af 29 clean_html,
9b9c5355 30 error_to_compat_str,
c5e8d7af 31 ExtractorError,
2d30521a 32 float_or_none,
4bb4a188
PH
33 get_element_by_attribute,
34 get_element_by_id,
dd27fd17 35 int_or_none,
94278f72 36 mimetype2ext,
4bb4a188 37 orderedSet,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
54fc90aa 40 qualities,
0cb58b02 41 remove_quotes,
3995d37d 42 remove_start,
cf7e015f 43 smuggle_url,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
81c2f20b 49 uppercase_escape,
6e6bc8da 50 urlencode_postdata,
c5e8d7af
PH
51)
52
5f6a1245 53
de7f3446 54class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
55 """Provide base functions for Youtube extractors"""
56 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 57 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
58
59 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
60 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
61 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 62
b2e8bc1b
JMF
63 _NETRC_MACHINE = 'youtube'
64 # If True it will raise an error if no login info is provided
65 _LOGIN_REQUIRED = False
66
d0ba5587
S
67 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
68
b2e8bc1b 69 def _set_language(self):
810fb84d
PH
70 self._set_cookie(
71 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 72 # YouTube sets the expire time to about two months
810fb84d 73 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 74
25f14e9f
S
75 def _ids_to_results(self, ids):
76 return [
77 self.url_result(vid_id, 'Youtube', video_id=vid_id)
78 for vid_id in ids]
79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
68217024 88 username, password = self._get_login_info()
b2e8bc1b
JMF
89 # No authentication to be performed
90 if username is None:
70d35d16 91 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 92 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 93 return True
b2e8bc1b 94
7cc3570e
PH
95 login_page = self._download_webpage(
96 self._LOGIN_URL, None,
69ea8ca4
PH
97 note='Downloading login page',
98 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
99 if login_page is False:
100 return
b2e8bc1b 101
1212e997 102 login_form = self._hidden_inputs(login_page)
c5e8d7af 103
e00eb564
S
104 def req(url, f_req, note, errnote):
105 data = login_form.copy()
106 data.update({
107 'pstMsg': 1,
108 'checkConnection': 'youtube',
109 'checkedDomains': 'youtube',
110 'hl': 'en',
111 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 112 'f.req': json.dumps(f_req),
e00eb564
S
113 'flowName': 'GlifWebSignIn',
114 'flowEntry': 'ServiceLogin',
041bc3ad 115 })
e00eb564
S
116 return self._download_json(
117 url, None, note=note, errnote=errnote,
118 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
119 fatal=False,
120 data=urlencode_postdata(data), headers={
121 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
122 'Google-Accounts-XSRF': 1,
123 })
124
3995d37d
S
125 def warn(message):
126 self._downloader.report_warning(message)
127
128 lookup_req = [
129 username,
130 None, [], None, 'US', None, None, 2, False, True,
131 [
132 None, None,
133 [2, 1, None, 1,
134 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
135 None, [], 4],
136 1, [None, None, []], None, None, None, True
137 ],
138 username,
139 ]
140
e00eb564 141 lookup_results = req(
3995d37d 142 self._LOOKUP_URL, lookup_req,
e00eb564
S
143 'Looking up account info', 'Unable to look up account info')
144
145 if lookup_results is False:
146 return False
041bc3ad 147
3995d37d
S
148 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
149 if not user_hash:
150 warn('Unable to extract user hash')
151 return False
152
153 challenge_req = [
154 user_hash,
155 None, 1, None, [1, None, None, None, [password, None, True]],
156 [
157 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
158 1, [None, None, []], None, None, None, True
159 ]]
83317f69 160
3995d37d
S
161 challenge_results = req(
162 self._CHALLENGE_URL, challenge_req,
163 'Logging in', 'Unable to log in')
83317f69 164
3995d37d 165 if challenge_results is False:
e00eb564 166 return
83317f69 167
3995d37d
S
168 login_res = try_get(challenge_results, lambda x: x[0][5], list)
169 if login_res:
170 login_msg = try_get(login_res, lambda x: x[5], compat_str)
171 warn(
172 'Unable to login: %s' % 'Invalid password'
173 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
174 return False
175
176 res = try_get(challenge_results, lambda x: x[0][-1], list)
177 if not res:
178 warn('Unable to extract result entry')
179 return False
180
181 tfa = try_get(res, lambda x: x[0][0], list)
182 if tfa:
183 tfa_str = try_get(tfa, lambda x: x[2], compat_str)
184 if tfa_str == 'TWO_STEP_VERIFICATION':
185 # SEND_SUCCESS - TFA code has been successfully sent to phone
186 # QUOTA_EXCEEDED - reached the limit of TFA codes
187 status = try_get(tfa, lambda x: x[5], compat_str)
188 if status == 'QUOTA_EXCEEDED':
189 warn('Exceeded the limit of TFA codes, try later')
190 return False
191
192 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
193 if not tl:
194 warn('Unable to extract TL')
195 return False
196
197 tfa_code = self._get_tfa_info('2-step verification code')
198
199 if not tfa_code:
200 warn(
201 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
202 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
203 return False
204
205 tfa_code = remove_start(tfa_code, 'G-')
206
207 tfa_req = [
208 user_hash, None, 2, None,
209 [
210 9, None, None, None, None, None, None, None,
211 [None, tfa_code, True, 2]
212 ]]
213
214 tfa_results = req(
215 self._TFA_URL.format(tl), tfa_req,
216 'Submitting TFA code', 'Unable to submit TFA code')
217
218 if tfa_results is False:
219 return False
220
221 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
222 if tfa_res:
223 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
224 warn(
225 'Unable to finish TFA: %s' % 'Invalid TFA code'
226 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
227 return False
228
229 check_cookie_url = try_get(
230 tfa_results, lambda x: x[0][-1][2], compat_str)
231 else:
232 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
233
234 if not check_cookie_url:
235 warn('Unable to extract CheckCookie URL')
236 return False
e00eb564
S
237
238 check_cookie_results = self._download_webpage(
3995d37d
S
239 check_cookie_url, None, 'Checking cookie', fatal=False)
240
241 if check_cookie_results is False:
242 return False
e00eb564 243
3995d37d
S
244 if 'https://myaccount.google.com/' not in check_cookie_results:
245 warn('Unable to log in')
b2e8bc1b 246 return False
e00eb564 247
b2e8bc1b
JMF
248 return True
249
30226342 250 def _download_webpage_handle(self, *args, **kwargs):
8d81f3e3 251 kwargs.setdefault('query', {})['disable_polymer'] = 'true'
30226342 252 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
253 *args, **compat_kwargs(kwargs))
254
b2e8bc1b
JMF
255 def _real_initialize(self):
256 if self._downloader is None:
257 return
42939b61 258 self._set_language()
b2e8bc1b
JMF
259 if not self._login():
260 return
c5e8d7af 261
8377574c 262
8e7aad20 263class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 264 # Extract entries from page with "Load more" button
648e6a1f
S
265 def _entries(self, page, playlist_id):
266 more_widget_html = content_html = page
267 for page_num in itertools.count(1):
061a75ed
S
268 for entry in self._process_page(content_html):
269 yield entry
648e6a1f
S
270
271 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
272 if not mobj:
273 break
274
275 more = self._download_json(
276 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
277 'Downloading page #%s' % page_num,
278 transform_source=uppercase_escape)
279 content_html = more['content_html']
280 if not content_html.strip():
281 # Some webpages show a "Load more" button but they don't
282 # have more videos
283 break
284 more_widget_html = more['load_more_widget_html']
285
061a75ed
S
286
287class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
288 def _process_page(self, content):
289 for video_id, video_title in self.extract_videos_from_page(content):
290 yield self.url_result(video_id, 'Youtube', video_id, video_title)
291
648e6a1f
S
292 def extract_videos_from_page(self, page):
293 ids_in_page = []
294 titles_in_page = []
295 for mobj in re.finditer(self._VIDEO_RE, page):
296 # The link with index 0 is not the first video of the playlist (not sure if still actual)
297 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
298 continue
299 video_id = mobj.group('id')
300 video_title = unescapeHTML(mobj.group('title'))
301 if video_title:
302 video_title = video_title.strip()
303 try:
304 idx = ids_in_page.index(video_id)
305 if video_title and not titles_in_page[idx]:
306 titles_in_page[idx] = video_title
307 except ValueError:
308 ids_in_page.append(video_id)
309 titles_in_page.append(video_title)
310 return zip(ids_in_page, titles_in_page)
311
312
061a75ed
S
313class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
314 def _process_page(self, content):
6dee688e
S
315 for playlist_id in orderedSet(re.findall(
316 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
317 content)):
061a75ed
S
318 yield self.url_result(
319 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
320
0c148415
S
321 def _real_extract(self, url):
322 playlist_id = self._match_id(url)
323 webpage = self._download_webpage(url, playlist_id)
0c148415 324 title = self._og_search_title(webpage, fatal=False)
061a75ed 325 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
326
327
360e1ca5 328class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 329 IE_DESC = 'YouTube.com'
cb7dfeea 330 _VALID_URL = r"""(?x)^
c5e8d7af 331 (
edb53e2d 332 (?:https?://|//) # http(s):// or protocol-independent URL
cb7dfeea 333 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
484aaeb2 334 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 335 (?:www\.)?pwnyoutube\.com/|
8b561bfc 336 (?:www\.)?hooktube\.com/|
f7000f3a 337 (?:www\.)?yourepeat\.com/|
e69ae5b9
JMF
338 tube\.majestyc\.net/|
339 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
340 (?:.*?\#/)? # handle anchor (#/) redirect urls
341 (?: # the various things that can precede the ID:
ac7553d0 342 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 343 |(?: # or the v= param in all its forms
f7000f3a 344 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 345 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 346 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
347 v=
348 )
f4b05232 349 ))
cbaed4bb
S
350 |(?:
351 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
352 vid\.plus| # or vid.plus/xxxx
353 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 354 )/
edb53e2d 355 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 356 )
c5e8d7af 357 )? # all until now is optional -> you can pass the naked ID
8963d9c2 358 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
359 (?!.*?\blist=
360 (?:
361 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
362 WL # WL are handled by the watch later IE
363 )
364 )
c5e8d7af 365 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 366 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 367 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26 368 _formats = {
c2d3cb4c 369 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
370 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
371 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
372 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
373 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
374 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
375 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
376 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 377 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 378 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
379 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
380 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
381 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
382 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
383 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 384 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 385 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
386 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 387
388
389 # 3D videos
c2d3cb4c 390 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
391 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
392 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
393 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 394 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
395 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
396 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 397
96fb5605 398 # Apple HTTP Live Streaming
11f12195 399 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 400 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
401 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
402 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
403 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
404 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 405 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
406 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
407
408 # DASH mp4 video
d23028a8
S
409 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
410 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
411 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
412 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
413 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
414 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
415 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
416 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
417 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
418 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
419 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
420 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 421
f6f1fc92 422 # Dash mp4 audio
d23028a8
S
423 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
424 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
425 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
426 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
427 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
428 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
429 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
430
431 # Dash webm
d23028a8
S
432 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
433 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
434 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
435 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
436 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
437 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
438 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
439 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
440 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
441 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
442 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
443 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
444 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
445 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
446 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 447 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
448 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
449 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
450 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
451 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
452 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
453 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
454
455 # Dash webm audio
d23028a8
S
456 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
457 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 458
0857baad 459 # Dash webm audio with opus inside
d23028a8
S
460 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
461 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
462 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 463
ce6b9a2d
PH
464 # RTMP (unnamed)
465 '_rtmp': {'protocol': 'rtmp'},
c5e8d7af 466 }
23d17e4b 467 _SUBTITLE_FORMATS = ('ttml', 'vtt')
836a086c 468
fd5c4aab
S
469 _GEO_BYPASS = False
470
78caa52a 471 IE_NAME = 'youtube'
2eb88d95
PH
472 _TESTS = [
473 {
2d3d2997 474 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
475 'info_dict': {
476 'id': 'BaW_jenozKc',
477 'ext': 'mp4',
478 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
479 'uploader': 'Philipp Hagemeister',
480 'uploader_id': 'phihag',
ec85ded8 481 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
4bc3a23e 482 'upload_date': '20121002',
7caf9830 483 'license': 'Standard YouTube License',
4bc3a23e
PH
484 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
485 'categories': ['Science & Technology'],
000b6b5a 486 'tags': ['youtube-dl'],
556dbe7f 487 'duration': 10,
3e7c1224
PH
488 'like_count': int,
489 'dislike_count': int,
7c80519c 490 'start_time': 1,
297a564b 491 'end_time': 9,
2eb88d95 492 }
0e853ca4 493 },
0e853ca4 494 {
2d3d2997 495 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
4bc3a23e
PH
496 'note': 'Test generic use_cipher_signature video (#897)',
497 'info_dict': {
498 'id': 'UxxajLWwzqY',
499 'ext': 'mp4',
500 'upload_date': '20120506',
501 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 502 'alt_title': 'I Love It (feat. Charli XCX)',
7caf9830 503 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
000b6b5a
S
504 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
505 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
506 'iconic ep', 'iconic', 'love', 'it'],
556dbe7f 507 'duration': 180,
4bc3a23e
PH
508 'uploader': 'Icona Pop',
509 'uploader_id': 'IconaPop',
ec85ded8 510 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
7caf9830 511 'license': 'Standard YouTube License',
0cb58b02 512 'creator': 'Icona Pop',
936784b2
S
513 'track': 'I Love It (feat. Charli XCX)',
514 'artist': 'Icona Pop',
2eb88d95 515 }
c108eb73
JMF
516 },
517 {
4bc3a23e
PH
518 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
519 'note': 'Test VEVO video with age protection (#956)',
520 'info_dict': {
521 'id': '07FYdnEawAQ',
522 'ext': 'mp4',
523 'upload_date': '20130703',
524 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
0cb58b02 525 'alt_title': 'Tunnel Vision',
4bc3a23e 526 'description': 'md5:64249768eec3bc4276236606ea996373',
556dbe7f 527 'duration': 419,
4bc3a23e
PH
528 'uploader': 'justintimberlakeVEVO',
529 'uploader_id': 'justintimberlakeVEVO',
ec85ded8 530 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
7caf9830 531 'license': 'Standard YouTube License',
0cb58b02 532 'creator': 'Justin Timberlake',
7e72694b 533 'track': 'Tunnel Vision',
936784b2 534 'artist': 'Justin Timberlake',
34952f09 535 'age_limit': 18,
c108eb73
JMF
536 }
537 },
fccd3771 538 {
4bc3a23e
PH
539 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
540 'note': 'Embed-only video (#1746)',
541 'info_dict': {
542 'id': 'yZIXLfi8CZQ',
543 'ext': 'mp4',
544 'upload_date': '20120608',
545 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
546 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
547 'uploader': 'SET India',
94bfcd23 548 'uploader_id': 'setindia',
ec85ded8 549 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
7caf9830 550 'license': 'Standard YouTube License',
94bfcd23 551 'age_limit': 18,
fccd3771
PH
552 }
553 },
11b56058 554 {
2d3d2997 555 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
556 'note': 'Use the first video ID in the URL',
557 'info_dict': {
558 'id': 'BaW_jenozKc',
559 'ext': 'mp4',
560 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
561 'uploader': 'Philipp Hagemeister',
562 'uploader_id': 'phihag',
ec85ded8 563 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 564 'upload_date': '20121002',
7caf9830 565 'license': 'Standard YouTube License',
11b56058
PM
566 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
567 'categories': ['Science & Technology'],
568 'tags': ['youtube-dl'],
556dbe7f 569 'duration': 10,
11b56058
PM
570 'like_count': int,
571 'dislike_count': int,
34a7de29
S
572 },
573 'params': {
574 'skip_download': True,
575 },
11b56058 576 },
dd27fd17 577 {
2d3d2997 578 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
579 'note': '256k DASH audio (format 141) via DASH manifest',
580 'info_dict': {
581 'id': 'a9LDPn-MO4I',
582 'ext': 'm4a',
583 'upload_date': '20121002',
584 'uploader_id': '8KVIDEO',
ec85ded8 585 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
586 'description': '',
587 'uploader': '8KVIDEO',
7caf9830 588 'license': 'Standard YouTube License',
4bc3a23e 589 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 590 },
4bc3a23e
PH
591 'params': {
592 'youtube_include_dash_manifest': True,
593 'format': '141',
4919603f 594 },
de3c7fe0 595 'skip': 'format 141 not served anymore',
dd27fd17 596 },
3489b7d2
JMF
597 # DASH manifest with encrypted signature
598 {
78caa52a
PH
599 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
600 'info_dict': {
601 'id': 'IB3lcPjvWLA',
602 'ext': 'm4a',
b766eb27
JMF
603 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
604 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
556dbe7f 605 'duration': 244,
78caa52a
PH
606 'uploader': 'AfrojackVEVO',
607 'uploader_id': 'AfrojackVEVO',
608 'upload_date': '20131011',
7caf9830 609 'license': 'Standard YouTube License',
3489b7d2 610 },
4bc3a23e 611 'params': {
78caa52a 612 'youtube_include_dash_manifest': True,
de3c7fe0 613 'format': '141/bestaudio[ext=m4a]',
3489b7d2
JMF
614 },
615 },
aaeb86f6
S
616 # JS player signature function name containing $
617 {
618 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
619 'info_dict': {
620 'id': 'nfWlot6h_JM',
621 'ext': 'm4a',
622 'title': 'Taylor Swift - Shake It Off',
0cb58b02 623 'alt_title': 'Shake It Off',
f57b7835 624 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
556dbe7f 625 'duration': 242,
aaeb86f6
S
626 'uploader': 'TaylorSwiftVEVO',
627 'uploader_id': 'TaylorSwiftVEVO',
628 'upload_date': '20140818',
7caf9830 629 'license': 'Standard YouTube License',
0cb58b02 630 'creator': 'Taylor Swift',
aaeb86f6
S
631 },
632 'params': {
633 'youtube_include_dash_manifest': True,
de3c7fe0 634 'format': '141/bestaudio[ext=m4a]',
aaeb86f6
S
635 },
636 },
aa79ac0c
PH
637 # Controversy video
638 {
639 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
640 'info_dict': {
641 'id': 'T4XJQO3qol8',
642 'ext': 'mp4',
556dbe7f 643 'duration': 219,
aa79ac0c
PH
644 'upload_date': '20100909',
645 'uploader': 'The Amazing Atheist',
646 'uploader_id': 'TheAmazingAtheist',
ec85ded8 647 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
7caf9830 648 'license': 'Standard YouTube License',
aa79ac0c
PH
649 'title': 'Burning Everyone\'s Koran',
650 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
651 }
c522adb1
JMF
652 },
653 # Normal age-gate video (No vevo, embed allowed)
654 {
2d3d2997 655 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
656 'info_dict': {
657 'id': 'HtVdAasjOgU',
658 'ext': 'mp4',
659 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 660 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 661 'duration': 142,
c522adb1
JMF
662 'uploader': 'The Witcher',
663 'uploader_id': 'WitcherGame',
ec85ded8 664 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 665 'upload_date': '20140605',
7caf9830 666 'license': 'Standard YouTube License',
34952f09 667 'age_limit': 18,
c522adb1
JMF
668 },
669 },
fccae2b9
S
670 # Age-gate video with encrypted signature
671 {
2d3d2997 672 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
fccae2b9
S
673 'info_dict': {
674 'id': '6kLq3WMV1nU',
675 'ext': 'mp4',
676 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
677 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
556dbe7f 678 'duration': 247,
fccae2b9
S
679 'uploader': 'LloydVEVO',
680 'uploader_id': 'LloydVEVO',
ec85ded8 681 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 682 'upload_date': '20110629',
7caf9830 683 'license': 'Standard YouTube License',
34952f09 684 'age_limit': 18,
fccae2b9
S
685 },
686 },
774e208f 687 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
7d02dcfa 688 # YouTube Red ad is not captured for creator
774e208f
PH
689 {
690 'url': '__2ABJjxzNo',
691 'info_dict': {
692 'id': '__2ABJjxzNo',
693 'ext': 'mp4',
556dbe7f 694 'duration': 266,
774e208f
PH
695 'upload_date': '20100430',
696 'uploader_id': 'deadmau5',
ec85ded8 697 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
0cb58b02 698 'creator': 'deadmau5',
774e208f
PH
699 'description': 'md5:12c56784b8032162bb936a5f76d55360',
700 'uploader': 'deadmau5',
7caf9830 701 'license': 'Standard YouTube License',
774e208f 702 'title': 'Deadmau5 - Some Chords (HD)',
0cb58b02 703 'alt_title': 'Some Chords',
774e208f
PH
704 },
705 'expected_warnings': [
706 'DASH manifest missing',
707 ]
e52a40ab
PH
708 },
709 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
710 {
711 'url': 'lqQg6PlCWgI',
712 'info_dict': {
713 'id': 'lqQg6PlCWgI',
714 'ext': 'mp4',
556dbe7f 715 'duration': 6085,
90227264 716 'upload_date': '20150827',
cbe2bd91 717 'uploader_id': 'olympic',
ec85ded8 718 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
7caf9830 719 'license': 'Standard YouTube License',
cbe2bd91 720 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 721 'uploader': 'Olympic',
cbe2bd91
PH
722 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
723 },
724 'params': {
725 'skip_download': 'requires avconv',
e52a40ab 726 }
cbe2bd91 727 },
6271f1ca
PH
728 # Non-square pixels
729 {
730 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
731 'info_dict': {
732 'id': '_b-2C3KPAM0',
733 'ext': 'mp4',
734 'stretched_ratio': 16 / 9.,
556dbe7f 735 'duration': 85,
6271f1ca
PH
736 'upload_date': '20110310',
737 'uploader_id': 'AllenMeow',
ec85ded8 738 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca
PH
739 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
740 'uploader': '孫艾倫',
7caf9830 741 'license': 'Standard YouTube License',
6271f1ca
PH
742 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
743 },
06b491eb
S
744 },
745 # url_encoded_fmt_stream_map is empty string
746 {
747 'url': 'qEJwOuvDf7I',
748 'info_dict': {
749 'id': 'qEJwOuvDf7I',
f57b7835 750 'ext': 'webm',
06b491eb
S
751 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
752 'description': '',
753 'upload_date': '20150404',
754 'uploader_id': 'spbelect',
755 'uploader': 'Наблюдатели Петербурга',
756 },
757 'params': {
758 'skip_download': 'requires avconv',
e323cf3f
S
759 },
760 'skip': 'This live event has ended.',
06b491eb 761 },
da77d856
S
762 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
763 {
764 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
765 'info_dict': {
766 'id': 'FIl7x6_3R5Y',
767 'ext': 'mp4',
768 'title': 'md5:7b81415841e02ecd4313668cde88737a',
769 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 770 'duration': 220,
da77d856
S
771 'upload_date': '20150625',
772 'uploader_id': 'dorappi2000',
ec85ded8 773 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 774 'uploader': 'dorappi2000',
7caf9830 775 'license': 'Standard YouTube License',
be49068d 776 'formats': 'mincount:32',
da77d856 777 },
2ee8f5d8 778 },
8a1a26ce
YCH
779 # DASH manifest with segment_list
780 {
781 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
782 'md5': '8ce563a1d667b599d21064e982ab9e31',
783 'info_dict': {
784 'id': 'CsmdDsKjzN8',
785 'ext': 'mp4',
17ee98e1 786 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
787 'uploader': 'Airtek',
788 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
789 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
7caf9830 790 'license': 'Standard YouTube License',
8a1a26ce
YCH
791 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
792 },
793 'params': {
794 'youtube_include_dash_manifest': True,
795 'format': '135', # bestvideo
be49068d
S
796 },
797 'skip': 'This live event has ended.',
2ee8f5d8 798 },
cf7e015f
S
799 {
800 # Multifeed videos (multiple cameras), URL is for Main Camera
801 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
802 'info_dict': {
803 'id': 'jqWvoWXjCVs',
804 'title': 'teamPGP: Rocket League Noob Stream',
805 'description': 'md5:dc7872fb300e143831327f1bae3af010',
806 },
807 'playlist': [{
808 'info_dict': {
809 'id': 'jqWvoWXjCVs',
810 'ext': 'mp4',
811 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
812 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 813 'duration': 7335,
cf7e015f
S
814 'upload_date': '20150721',
815 'uploader': 'Beer Games Beer',
816 'uploader_id': 'beergamesbeer',
ec85ded8 817 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 818 'license': 'Standard YouTube License',
cf7e015f
S
819 },
820 }, {
821 'info_dict': {
822 'id': '6h8e8xoXJzg',
823 'ext': 'mp4',
824 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
825 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 826 'duration': 7337,
cf7e015f
S
827 'upload_date': '20150721',
828 'uploader': 'Beer Games Beer',
829 'uploader_id': 'beergamesbeer',
ec85ded8 830 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 831 'license': 'Standard YouTube License',
cf7e015f
S
832 },
833 }, {
834 'info_dict': {
835 'id': 'PUOgX5z9xZw',
836 'ext': 'mp4',
837 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
838 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 839 'duration': 7337,
cf7e015f
S
840 'upload_date': '20150721',
841 'uploader': 'Beer Games Beer',
842 'uploader_id': 'beergamesbeer',
ec85ded8 843 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 844 'license': 'Standard YouTube License',
cf7e015f
S
845 },
846 }, {
847 'info_dict': {
848 'id': 'teuwxikvS5k',
849 'ext': 'mp4',
850 'title': 'teamPGP: Rocket League Noob Stream (zim)',
851 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 852 'duration': 7334,
cf7e015f
S
853 'upload_date': '20150721',
854 'uploader': 'Beer Games Beer',
855 'uploader_id': 'beergamesbeer',
ec85ded8 856 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 857 'license': 'Standard YouTube License',
cf7e015f
S
858 },
859 }],
860 'params': {
861 'skip_download': True,
862 },
cbaed4bb 863 },
f9f49d87
S
864 {
865 # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
866 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
867 'info_dict': {
868 'id': 'gVfLd0zydlo',
869 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
870 },
871 'playlist_count': 2,
be49068d 872 'skip': 'Not multifeed anymore',
f9f49d87 873 },
cbaed4bb 874 {
2d3d2997 875 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 876 'only_matching': True,
0e49d9a6 877 },
6d4fc66b 878 {
2d3d2997 879 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
880 'only_matching': True,
881 },
0e49d9a6 882 {
61f92af1 883 # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
a8776b10
S
884 # Also tests cut-off URL expansion in video description (see
885 # https://github.com/rg3/youtube-dl/issues/1892,
886 # https://github.com/rg3/youtube-dl/issues/8164)
0e49d9a6
LL
887 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
888 'info_dict': {
889 'id': 'lsguqyKfVQg',
890 'ext': 'mp4',
891 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
0cb58b02 892 'alt_title': 'Dark Walk',
0e49d9a6 893 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 894 'duration': 133,
0e49d9a6
LL
895 'upload_date': '20151119',
896 'uploader_id': 'IronSoulElf',
ec85ded8 897 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 898 'uploader': 'IronSoulElf',
7caf9830 899 'license': 'Standard YouTube License',
0cb58b02 900 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
0e49d9a6
LL
901 },
902 'params': {
903 'skip_download': True,
904 },
905 },
61f92af1
S
906 {
907 # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
908 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
909 'only_matching': True,
910 },
313dfc45
LL
911 {
912 # Video with yt:stretch=17:0
913 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
914 'info_dict': {
915 'id': 'Q39EVAstoRM',
916 'ext': 'mp4',
917 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
918 'description': 'md5:ee18a25c350637c8faff806845bddee9',
919 'upload_date': '20151107',
920 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
921 'uploader': 'CH GAMER DROID',
922 },
923 'params': {
924 'skip_download': True,
925 },
be49068d 926 'skip': 'This video does not exist.',
313dfc45 927 },
7caf9830
S
928 {
929 # Video licensed under Creative Commons
930 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
931 'info_dict': {
932 'id': 'M4gD1WSo5mA',
933 'ext': 'mp4',
934 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
935 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 936 'duration': 721,
7caf9830
S
937 'upload_date': '20150127',
938 'uploader_id': 'BerkmanCenter',
ec85ded8 939 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 940 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
941 'license': 'Creative Commons Attribution license (reuse allowed)',
942 },
943 'params': {
944 'skip_download': True,
945 },
946 },
fd050249
S
947 {
948 # Channel-like uploader_url
949 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
950 'info_dict': {
951 'id': 'eQcmzGIKrzg',
952 'ext': 'mp4',
953 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
954 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 955 'duration': 4060,
fd050249
S
956 'upload_date': '20151119',
957 'uploader': 'Bernie 2016',
958 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 959 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
960 'license': 'Creative Commons Attribution license (reuse allowed)',
961 },
962 'params': {
963 'skip_download': True,
964 },
965 },
040ac686
S
966 {
967 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
968 'only_matching': True,
7f29cf54
S
969 },
970 {
971 # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
972 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
973 'only_matching': True,
6496ccb4
S
974 },
975 {
976 # Rental video preview
977 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
978 'info_dict': {
979 'id': 'uGpuVWrhIzE',
980 'ext': 'mp4',
981 'title': 'Piku - Trailer',
982 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
983 'upload_date': '20150811',
984 'uploader': 'FlixMatrix',
985 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 986 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
987 'license': 'Standard YouTube License',
988 },
989 'params': {
990 'skip_download': True,
991 },
022a5d66 992 },
12afdc2a
S
993 {
994 # YouTube Red video with episode data
995 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
996 'info_dict': {
997 'id': 'iqKdEhx-dD4',
998 'ext': 'mp4',
999 'title': 'Isolation - Mind Field (Ep 1)',
556dbe7f
S
1000 'description': 'md5:8013b7ddea787342608f63a13ddc9492',
1001 'duration': 2085,
12afdc2a
S
1002 'upload_date': '20170118',
1003 'uploader': 'Vsauce',
1004 'uploader_id': 'Vsauce',
1005 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1006 'license': 'Standard YouTube License',
1007 'series': 'Mind Field',
1008 'season_number': 1,
1009 'episode_number': 1,
1010 },
1011 'params': {
1012 'skip_download': True,
1013 },
1014 'expected_warnings': [
1015 'Skipping DASH manifest',
1016 ],
1017 },
c7121fa7
S
1018 {
1019 # The following content has been identified by the YouTube community
1020 # as inappropriate or offensive to some audiences.
1021 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1022 'info_dict': {
1023 'id': '6SJNVb0GnPI',
1024 'ext': 'mp4',
1025 'title': 'Race Differences in Intelligence',
1026 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1027 'duration': 965,
1028 'upload_date': '20140124',
1029 'uploader': 'New Century Foundation',
1030 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1031 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1032 'license': 'Standard YouTube License',
1033 'view_count': int,
1034 },
1035 'params': {
1036 'skip_download': True,
1037 },
1038 },
022a5d66
S
1039 {
1040 # itag 212
1041 'url': '1t24XAntNCY',
1042 'only_matching': True,
fd5c4aab
S
1043 },
1044 {
1045 # geo restricted to JP
1046 'url': 'sJL6WA-aGkQ',
1047 'only_matching': True,
1048 },
d0ba5587
S
1049 {
1050 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1051 'only_matching': True,
1052 },
2eb88d95
PH
1053 ]
1054
e0df6211
PH
1055 def __init__(self, *args, **kwargs):
1056 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1057 self._player_cache = {}
e0df6211 1058
c5e8d7af
PH
1059 def report_video_info_webpage_download(self, video_id):
1060 """Report attempt to download video info webpage."""
69ea8ca4 1061 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1062
c5e8d7af
PH
1063 def report_information_extraction(self, video_id):
1064 """Report attempt to extract video information."""
69ea8ca4 1065 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1066
1067 def report_unavailable_format(self, video_id, format):
1068 """Report extracted video URL."""
69ea8ca4 1069 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1070
1071 def report_rtmp_download(self):
1072 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1073 self.to_screen('RTMP download detected')
c5e8d7af 1074
60064c53
PH
1075 def _signature_cache_id(self, example_sig):
1076 """ Return a string representation of a signature """
78caa52a 1077 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53
PH
1078
1079 def _extract_signature_function(self, video_id, player_url, example_sig):
cf010131 1080 id_m = re.match(
e31fed95 1081 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
cf010131 1082 player_url)
c081b35c
PH
1083 if not id_m:
1084 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
1085 player_type = id_m.group('ext')
1086 player_id = id_m.group('id')
1087
c4417ddb 1088 # Read from filesystem cache
60064c53
PH
1089 func_id = '%s_%s_%s' % (
1090 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1091 assert os.path.basename(func_id) == func_id
a0e07d31 1092
69ea8ca4 1093 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1094 if cache_spec is not None:
78caa52a 1095 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1096
6d1a55a5
PH
1097 download_note = (
1098 'Downloading player %s' % player_url
1099 if self._downloader.params.get('verbose') else
1100 'Downloading %s player %s' % (player_type, player_id)
1101 )
e0df6211
PH
1102 if player_type == 'js':
1103 code = self._download_webpage(
1104 player_url, video_id,
6d1a55a5 1105 note=download_note,
69ea8ca4 1106 errnote='Download of %s failed' % player_url)
83799698 1107 res = self._parse_sig_js(code)
c4417ddb 1108 elif player_type == 'swf':
e0df6211
PH
1109 urlh = self._request_webpage(
1110 player_url, video_id,
6d1a55a5 1111 note=download_note,
69ea8ca4 1112 errnote='Download of %s failed' % player_url)
e0df6211 1113 code = urlh.read()
83799698 1114 res = self._parse_sig_swf(code)
e0df6211
PH
1115 else:
1116 assert False, 'Invalid player type %r' % player_type
1117
785521bf
PH
1118 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1119 cache_res = res(test_string)
1120 cache_spec = [ord(c) for c in cache_res]
83799698 1121
69ea8ca4 1122 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1123 return res
1124
60064c53 1125 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1126 def gen_sig_code(idxs):
1127 def _genslice(start, end, step):
78caa52a 1128 starts = '' if start == 0 else str(start)
8bcc8756 1129 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1130 steps = '' if step == 1 else (':%d' % step)
78caa52a 1131 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1132
1133 step = None
7af808a5
PH
1134 # Quelch pyflakes warnings - start will be set when step is set
1135 start = '(Never used)'
edf3e38e
PH
1136 for i, prev in zip(idxs[1:], idxs[:-1]):
1137 if step is not None:
1138 if i - prev == step:
1139 continue
1140 yield _genslice(start, prev, step)
1141 step = None
1142 continue
1143 if i - prev in [-1, 1]:
1144 step = i - prev
1145 start = prev
1146 continue
1147 else:
78caa52a 1148 yield 's[%d]' % prev
edf3e38e 1149 if step is None:
78caa52a 1150 yield 's[%d]' % i
edf3e38e
PH
1151 else:
1152 yield _genslice(start, i, step)
1153
78caa52a 1154 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1155 cache_res = func(test_string)
edf3e38e 1156 cache_spec = [ord(c) for c in cache_res]
78caa52a 1157 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1158 signature_id_tuple = '(%s)' % (
1159 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1160 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1161 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1162 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1163
e0df6211
PH
1164 def _parse_sig_js(self, jscode):
1165 funcname = self._search_regex(
3c90cc8b
S
1166 (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1167 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),
1168 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1169
1170 jsi = JSInterpreter(jscode)
1171 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1172 return lambda s: initial_function([s])
1173
1174 def _parse_sig_swf(self, file_contents):
54256267 1175 swfi = SWFInterpreter(file_contents)
78caa52a 1176 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1177 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1178 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1179 return lambda s: initial_function([s])
1180
83799698 1181 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1182 """Turn the encrypted s field into a working signature"""
6b37f0be 1183
c8bf86d5 1184 if player_url is None:
69ea8ca4 1185 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1186
69ea8ca4 1187 if player_url.startswith('//'):
78caa52a 1188 player_url = 'https:' + player_url
3c90cc8b
S
1189 elif not re.match(r'https?://', player_url):
1190 player_url = compat_urlparse.urljoin(
1191 'https://www.youtube.com', player_url)
c8bf86d5 1192 try:
62af3a0e 1193 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1194 if player_id not in self._player_cache:
1195 func = self._extract_signature_function(
60064c53 1196 video_id, player_url, s
c8bf86d5
PH
1197 )
1198 self._player_cache[player_id] = func
1199 func = self._player_cache[player_id]
1200 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1201 self._print_sig_code(func, s)
c8bf86d5
PH
1202 return func(s)
1203 except Exception as e:
1204 tb = traceback.format_exc()
1205 raise ExtractorError(
78caa52a 1206 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1207
360e1ca5 1208 def _get_subtitles(self, video_id, webpage):
de7f3446 1209 try:
60e47a26 1210 subs_doc = self._download_xml(
38c2e5b8 1211 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1212 video_id, note=False)
1213 except ExtractorError as err:
9b9c5355 1214 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1215 return {}
de7f3446
JMF
1216
1217 sub_lang_list = {}
60e47a26
JMF
1218 for track in subs_doc.findall('track'):
1219 lang = track.attrib['lang_code']
7e660ac1
LD
1220 if lang in sub_lang_list:
1221 continue
360e1ca5 1222 sub_formats = []
23d17e4b 1223 for ext in self._SUBTITLE_FORMATS:
15707c7e 1224 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1225 'lang': lang,
1226 'v': video_id,
1227 'fmt': ext,
1228 'name': track.attrib['name'].encode('utf-8'),
1229 })
1230 sub_formats.append({
1231 'url': 'https://www.youtube.com/api/timedtext?' + params,
1232 'ext': ext,
1233 })
1234 sub_lang_list[lang] = sub_formats
de7f3446 1235 if not sub_lang_list:
69ea8ca4 1236 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1237 return {}
1238 return sub_lang_list
1239
a72778d3
S
1240 def _get_ytplayer_config(self, video_id, webpage):
1241 patterns = (
526b3b07
S
1242 # User data may contain arbitrary character sequences that may affect
1243 # JSON extraction with regex, e.g. when '};' is contained the second
1244 # regex won't capture the whole JSON. Yet working around by trying more
1245 # concrete regex first keeping in mind proper quoted string handling
1246 # to be implemented in future that will replace this workaround (see
1247 # https://github.com/rg3/youtube-dl/issues/7468,
1248 # https://github.com/rg3/youtube-dl/pull/7599)
a72778d3
S
1249 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1250 r';ytplayer\.config\s*=\s*({.+?});',
1251 )
1252 config = self._search_regex(
1253 patterns, webpage, 'ytplayer.config', default=None)
1254 if config:
1255 return self._parse_json(
1256 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1257
360e1ca5 1258 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1259 """We need the webpage for getting the captions url, pass it as an
1260 argument to speed up the process."""
69ea8ca4 1261 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1262 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1263 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1264 if not player_config:
de7f3446
JMF
1265 self._downloader.report_warning(err_msg)
1266 return {}
de7f3446 1267 try:
0792d563 1268 args = player_config['args']
b78b292f
S
1269 caption_url = args.get('ttsurl')
1270 if caption_url:
1271 timestamp = args['timestamp']
1272 # We get the available subtitles
15707c7e 1273 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1274 'type': 'list',
1275 'tlangs': 1,
1276 'asrs': 1,
1277 })
1278 list_url = caption_url + '&' + list_params
1279 caption_list = self._download_xml(list_url, video_id)
1280 original_lang_node = caption_list.find('track')
1281 if original_lang_node is None:
1282 self._downloader.report_warning('Video doesn\'t have automatic captions')
1283 return {}
1284 original_lang = original_lang_node.attrib['lang_code']
1285 caption_kind = original_lang_node.attrib.get('kind', '')
1286
1287 sub_lang_list = {}
1288 for lang_node in caption_list.findall('target'):
1289 sub_lang = lang_node.attrib['lang_code']
1290 sub_formats = []
1291 for ext in self._SUBTITLE_FORMATS:
15707c7e 1292 params = compat_urllib_parse_urlencode({
b78b292f
S
1293 'lang': original_lang,
1294 'tlang': sub_lang,
1295 'fmt': ext,
1296 'ts': timestamp,
1297 'kind': caption_kind,
1298 })
1299 sub_formats.append({
1300 'url': caption_url + '&' + params,
1301 'ext': ext,
1302 })
1303 sub_lang_list[sub_lang] = sub_formats
1304 return sub_lang_list
1305
ddbb4c5c
S
1306 def make_captions(sub_url, sub_langs):
1307 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1308 caption_qs = compat_parse_qs(parsed_sub_url.query)
1309 captions = {}
1310 for sub_lang in sub_langs:
1311 sub_formats = []
1312 for ext in self._SUBTITLE_FORMATS:
1313 caption_qs.update({
1314 'tlang': [sub_lang],
1315 'fmt': [ext],
1316 })
1317 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1318 query=compat_urllib_parse_urlencode(caption_qs, True)))
1319 sub_formats.append({
1320 'url': sub_url,
1321 'ext': ext,
1322 })
1323 captions[sub_lang] = sub_formats
1324 return captions
1325
1326 # New captions format as of 22.06.2017
1327 player_response = args.get('player_response')
1328 if player_response and isinstance(player_response, compat_str):
1329 player_response = self._parse_json(
1330 player_response, video_id, fatal=False)
1331 if player_response:
1332 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1333 base_url = renderer['captionTracks'][0]['baseUrl']
1334 sub_lang_list = []
1335 for lang in renderer['translationLanguages']:
1336 lang_code = lang.get('languageCode')
1337 if lang_code:
1338 sub_lang_list.append(lang_code)
1339 return make_captions(base_url, sub_lang_list)
1340
b78b292f
S
1341 # Some videos don't provide ttsurl but rather caption_tracks and
1342 # caption_translation_languages (e.g. 20LmZk1hakA)
ddbb4c5c 1343 # Does not used anymore as of 22.06.2017
b78b292f
S
1344 caption_tracks = args['caption_tracks']
1345 caption_translation_languages = args['caption_translation_languages']
1346 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
ddbb4c5c 1347 sub_lang_list = []
b78b292f
S
1348 for lang in caption_translation_languages.split(','):
1349 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1350 sub_lang = lang_qs.get('lc', [None])[0]
ddbb4c5c
S
1351 if sub_lang:
1352 sub_lang_list.append(sub_lang)
1353 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1354 # An extractor error can be raise by the download process if there are
1355 # no automatic captions but there are subtitles
ddbb4c5c 1356 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1357 self._downloader.report_warning(err_msg)
1358 return {}
1359
d77ab8e2
S
1360 def _mark_watched(self, video_id, video_info):
1361 playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1362 if not playback_url:
1363 return
1364 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1365 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1366
1367 # cpn generation algorithm is reverse engineered from base.js.
1368 # In fact it works even with dummy cpn.
1369 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1370 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1371
1372 qs.update({
1373 'ver': ['2'],
1374 'cpn': [cpn],
1375 })
1376 playback_url = compat_urlparse.urlunparse(
15707c7e 1377 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1378
1379 self._download_webpage(
1380 playback_url, video_id, 'Marking watched',
1381 'Unable to mark watched', fatal=False)
1382
66c9fa36
S
1383 @staticmethod
1384 def _extract_urls(webpage):
1385 # Embedded YouTube player
1386 entries = [
1387 unescapeHTML(mobj.group('url'))
1388 for mobj in re.finditer(r'''(?x)
1389 (?:
1390 <iframe[^>]+?src=|
1391 data-video-url=|
1392 <embed[^>]+?src=|
1393 embedSWF\(?:\s*|
1394 <object[^>]+data=|
1395 new\s+SWFObject\(
1396 )
1397 (["\'])
1398 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1399 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1400 \1''', webpage)]
1401
1402 # lazyYT YouTube embed
1403 entries.extend(list(map(
1404 unescapeHTML,
1405 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1406
1407 # Wordpress "YouTube Video Importer" plugin
1408 matches = re.findall(r'''(?x)<div[^>]+
1409 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1410 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1411 entries.extend(m[-1] for m in matches)
1412
1413 return entries
1414
1415 @staticmethod
1416 def _extract_url(webpage):
1417 urls = YoutubeIE._extract_urls(webpage)
1418 return urls[0] if urls else None
1419
97665381
PH
1420 @classmethod
1421 def extract_id(cls, url):
1422 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1423 if mobj is None:
69ea8ca4 1424 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1425 video_id = mobj.group(2)
1426 return video_id
1427
1fb07d10
JG
1428 def _extract_annotations(self, video_id):
1429 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
69ea8ca4 1430 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1fb07d10 1431
9cafc3fd
S
1432 @staticmethod
1433 def _extract_chapters(description, duration):
1434 if not description:
1435 return None
1436 chapter_lines = re.findall(
1437 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1438 description)
1439 if not chapter_lines:
1440 return None
1441 chapters = []
1442 for next_num, (chapter_line, time_point) in enumerate(
1443 chapter_lines, start=1):
1444 start_time = parse_duration(time_point)
1445 if start_time is None:
1446 continue
39d4c1be
S
1447 if start_time > duration:
1448 break
9cafc3fd
S
1449 end_time = (duration if next_num == len(chapter_lines)
1450 else parse_duration(chapter_lines[next_num][1]))
1451 if end_time is None:
1452 continue
39d4c1be
S
1453 if end_time > duration:
1454 end_time = duration
1455 if start_time > end_time:
1456 break
9cafc3fd
S
1457 chapter_title = re.sub(
1458 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1459 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1460 chapters.append({
1461 'start_time': start_time,
1462 'end_time': end_time,
1463 'title': chapter_title,
1464 })
1465 return chapters
1466
c5e8d7af 1467 def _real_extract(self, url):
cf7e015f
S
1468 url, smuggled_data = unsmuggle_url(url, {})
1469
7e8c0af0 1470 proto = (
78caa52a
PH
1471 'http' if self._downloader.params.get('prefer_insecure', False)
1472 else 'https')
7e8c0af0 1473
7c80519c 1474 start_time = None
297a564b 1475 end_time = None
7c80519c
JMF
1476 parsed_url = compat_urllib_parse_urlparse(url)
1477 for component in [parsed_url.fragment, parsed_url.query]:
1478 query = compat_parse_qs(component)
297a564b 1479 if start_time is None and 't' in query:
7c80519c 1480 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1481 if start_time is None and 'start' in query:
1482 start_time = parse_duration(query['start'][0])
297a564b
JMF
1483 if end_time is None and 'end' in query:
1484 end_time = parse_duration(query['end'][0])
7c80519c 1485
c5e8d7af
PH
1486 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1487 mobj = re.search(self._NEXT_URL_RE, url)
1488 if mobj:
7fd002c0 1489 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1490 video_id = self.extract_id(url)
c5e8d7af
PH
1491
1492 # Get video webpage
aa79ac0c 1493 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
a1f934b1 1494 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
1495
1496 # Attempt to extract SWF player URL
e0df6211 1497 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1498 if mobj is not None:
1499 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1500 else:
1501 player_url = None
1502
d8d24a92
S
1503 dash_mpds = []
1504
1505 def add_dash_mpd(video_info):
1506 dash_mpd = video_info.get('dashmpd')
1507 if dash_mpd and dash_mpd[0] not in dash_mpds:
1508 dash_mpds.append(dash_mpd[0])
1509
c7121fa7
S
1510 is_live = None
1511 view_count = None
1512
1513 def extract_view_count(v_info):
1514 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1515
c5e8d7af 1516 # Get video info
6449cd80 1517 embed_webpage = None
c108eb73 1518 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
1519 age_gate = True
1520 # We simulate the access to the video from www.youtube.com/v/{video_id}
1521 # this can be viewed without login into Youtube
beb95e77
CL
1522 url = proto + '://www.youtube.com/embed/%s' % video_id
1523 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
15707c7e 1524 data = compat_urllib_parse_urlencode({
2c57c7fa
JMF
1525 'video_id': video_id,
1526 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 1527 'sts': self._search_regex(
beb95e77 1528 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 1529 })
7e8c0af0 1530 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
94bd3613
PH
1531 video_info_webpage = self._download_webpage(
1532 video_info_url, video_id,
20436c30 1533 note='Refetching age-gated info webpage',
94bd3613 1534 errnote='unable to download video info webpage')
c5e8d7af 1535 video_info = compat_parse_qs(video_info_webpage)
d8d24a92 1536 add_dash_mpd(video_info)
c108eb73
JMF
1537 else:
1538 age_gate = False
bc93bdb5 1539 video_info = None
dc4e4f90 1540 sts = None
d8d24a92 1541 # Try looking directly into the video webpage
a72778d3
S
1542 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1543 if ytplayer_config:
4e62ebe2 1544 args = ytplayer_config['args']
4c76aa06 1545 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1546 # Convert to the same format returned by compat_parse_qs
1547 video_info = dict((k, [v]) for k, v in args.items())
1548 add_dash_mpd(video_info)
6496ccb4
S
1549 # Rental video is not rented but preview is available (e.g.
1550 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1551 # https://github.com/rg3/youtube-dl/issues/10532)
1552 if not video_info and args.get('ypc_vid'):
1553 return self.url_result(
1554 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1555 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1556 is_live = True
dc4e4f90 1557 sts = ytplayer_config.get('sts')
0a3cf9ad
S
1558 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1559 # We also try looking in get_video_info since it may contain different dashmpd
1560 # URL that points to a DASH manifest with possibly different itag set (some itags
1561 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1562 # manifest pointed by get_video_info's dashmpd).
1563 # The general idea is to take a union of itags of both DASH manifests (for example
1564 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
4e62ebe2 1565 self.report_video_info_webpage_download(video_id)
dc4e4f90
S
1566 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1567 query = {
1568 'video_id': video_id,
1569 'ps': 'default',
1570 'eurl': '',
1571 'gl': 'US',
1572 'hl': 'en',
1573 }
1574 if el:
1575 query['el'] = el
1576 if sts:
1577 query['sts'] = sts
810fb84d 1578 video_info_webpage = self._download_webpage(
dc4e4f90 1579 '%s://www.youtube.com/get_video_info' % proto,
4e62ebe2 1580 video_id, note=False,
dc4e4f90
S
1581 errnote='unable to download video info webpage',
1582 fatal=False, query=query)
1583 if not video_info_webpage:
1584 continue
0a3cf9ad 1585 get_video_info = compat_parse_qs(video_info_webpage)
fd545fc6 1586 add_dash_mpd(get_video_info)
c7121fa7
S
1587 if view_count is None:
1588 view_count = extract_view_count(get_video_info)
0a3cf9ad
S
1589 if not video_info:
1590 video_info = get_video_info
1591 if 'token' in get_video_info:
89ea063e
S
1592 # Different get_video_info requests may report different results, e.g.
1593 # some may report video unavailability, but some may serve it without
1594 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1595 # the original webpage as well as el=info and el=embedded get_video_info
1596 # requests report video unavailability due to geo restriction while
1597 # el=detailpage succeeds and returns valid data). This is probably
1598 # due to YouTube measures against IP ranges of hosting providers.
1599 # Working around by preferring the first succeeded video_info containing
1600 # the token if no such video_info yet was found.
44b2264f
S
1601 if 'token' not in video_info:
1602 video_info = get_video_info
4e62ebe2 1603 break
bbb7c3f7
YCH
1604
1605 def extract_unavailable_message():
1606 return self._html_search_regex(
1607 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1608 video_webpage, 'unavailable message', default=None)
1609
c5e8d7af
PH
1610 if 'token' not in video_info:
1611 if 'reason' in video_info:
af214c3a 1612 if 'The uploader has not made this video available in your country.' in video_info['reason']:
fd5c4aab
S
1613 regions_allowed = self._html_search_meta(
1614 'regionsAllowed', video_webpage, default=None)
1615 countries = regions_allowed.split(',') if regions_allowed else None
1616 self.raise_geo_restricted(
1617 msg=video_info['reason'][0], countries=countries)
bbb7c3f7
YCH
1618 reason = video_info['reason'][0]
1619 if 'Invalid parameters' in reason:
1620 unavailable_message = extract_unavailable_message()
1621 if unavailable_message:
1622 reason = unavailable_message
d11271dd 1623 raise ExtractorError(
bbb7c3f7 1624 'YouTube said: %s' % reason,
d11271dd 1625 expected=True, video_id=video_id)
c5e8d7af 1626 else:
d11271dd 1627 raise ExtractorError(
78caa52a 1628 '"token" parameter not in video info for unknown reason',
d11271dd 1629 video_id=video_id)
c5e8d7af 1630
cf7e015f
S
1631 # title
1632 if 'title' in video_info:
1633 video_title = video_info['title'][0]
1634 else:
1635 self._downloader.report_warning('Unable to extract video title')
1636 video_title = '_'
1637
1638 # description
9cafc3fd 1639 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1640 if video_description:
fa4bc6e7
RA
1641
1642 def replace_url(m):
1643 redir_url = compat_urlparse.urljoin(url, m.group(1))
1644 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1645 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1646 qs = compat_parse_qs(parsed_redir_url.query)
1647 q = qs.get('q')
1648 if q and q[0]:
1649 return q[0]
1650 return redir_url
1651
9cafc3fd 1652 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1653 <a\s+
25cb7a0e 1654 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1655 (?:title|href)="([^"]+)"\s+
25cb7a0e 1656 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1657 class="[^"]*"[^>]*>
23f13e97 1658 [^<]+\.{3}\s*
cf7e015f 1659 </a>
fa4bc6e7 1660 ''', replace_url, video_description)
cf7e015f
S
1661 video_description = clean_html(video_description)
1662 else:
1663 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1664 if fd_mobj:
1665 video_description = unescapeHTML(fd_mobj.group(1))
1666 else:
1667 video_description = ''
1668
5e1eddb9
S
1669 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1670 if not self._downloader.params.get('noplaylist'):
1671 entries = []
1672 feed_ids = []
6863631c 1673 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
5e1eddb9 1674 for feed in multifeed_metadata_list.split(','):
6863631c
S
1675 # Unquote should take place before split on comma (,) since textual
1676 # fields may contain comma as well (see
1677 # https://github.com/rg3/youtube-dl/issues/8536)
1678 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
5e1eddb9
S
1679 entries.append({
1680 '_type': 'url_transparent',
1681 'ie_key': 'Youtube',
1682 'url': smuggle_url(
1683 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1684 {'force_singlefeed': True}),
1685 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1686 })
1687 feed_ids.append(feed_data['id'][0])
1688 self.to_screen(
1689 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1690 % (', '.join(feed_ids), video_id))
1691 return self.playlist_result(entries, video_id, video_title, video_description)
1692 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1693
c7121fa7 1694 if view_count is None:
1c9c8de2 1695 view_count = extract_view_count(video_info)
1d699755 1696
c5e8d7af
PH
1697 # Check for "rental" videos
1698 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
c9612c04 1699 raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 1700
c63ca0ee
S
1701 def _extract_filesize(media_url):
1702 return int_or_none(self._search_regex(
1703 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1704
c5e8d7af
PH
1705 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1706 self.report_rtmp_download()
dd27fd17
PH
1707 formats = [{
1708 'format_id': '_rtmp',
1709 'protocol': 'rtmp',
1710 'url': video_info['conn'][0],
1711 'player_url': player_url,
1712 }]
391dd6f0 1713 elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 1714 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1715 if 'rtmpe%3Dyes' in encoded_url_map:
a7055eb9 1716 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
3318832e 1717 formats_spec = {}
82156fdb 1718 fmt_list = video_info.get('fmt_list', [''])[0]
1719 if fmt_list:
1720 for fmt in fmt_list.split(','):
1721 spec = fmt.split('/')
3318832e 1722 if len(spec) > 1:
1723 width_height = spec[1].split('x')
1724 if len(width_height) == 2:
1725 formats_spec[spec[0]] = {
1726 'resolution': spec[1],
1727 'width': int_or_none(width_height[0]),
1728 'height': int_or_none(width_height[1]),
1729 }
54fc90aa 1730 q = qualities(['small', 'medium', 'hd720'])
c9afb51c 1731 formats = []
00fe14fc 1732 for url_data_str in encoded_url_map.split(','):
c5e8d7af 1733 url_data = compat_parse_qs(url_data_str)
201e9eaa
PH
1734 if 'itag' not in url_data or 'url' not in url_data:
1735 continue
1736 format_id = url_data['itag'][0]
1737 url = url_data['url'][0]
1738
a49eccdf 1739 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
6449cd80 1740 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
beb95e77 1741 jsplayer_url_json = self._search_regex(
6449cd80
PH
1742 ASSETS_RE,
1743 embed_webpage if age_gate else video_webpage,
1744 'JS player URL (1)', default=None)
1745 if not jsplayer_url_json and not age_gate:
1746 # We need the embed website after all
1747 if embed_webpage is None:
1748 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1749 embed_webpage = self._download_webpage(
1750 embed_url, video_id, 'Downloading embed webpage')
1751 jsplayer_url_json = self._search_regex(
1752 ASSETS_RE, embed_webpage, 'JS player URL')
1753
beb95e77 1754 player_url = json.loads(jsplayer_url_json)
201e9eaa
PH
1755 if player_url is None:
1756 player_url_json = self._search_regex(
1757 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
78caa52a 1758 video_webpage, 'age gate player URL')
201e9eaa
PH
1759 player_url = json.loads(player_url_json)
1760
a49eccdf
YCH
1761 if 'sig' in url_data:
1762 url += '&signature=' + url_data['sig'][0]
1763 elif 's' in url_data:
1764 encrypted_sig = url_data['s'][0]
1765
201e9eaa 1766 if self._downloader.params.get('verbose'):
cf010131 1767 if player_url is None:
201e9eaa
PH
1768 player_version = 'unknown'
1769 player_desc = 'unknown'
1770 else:
1771 if player_url.endswith('swf'):
1772 player_version = self._search_regex(
1773 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
78caa52a 1774 'flash player', fatal=False)
201e9eaa 1775 player_desc = 'flash player %s' % player_version
cf010131 1776 else:
201e9eaa 1777 player_version = self._search_regex(
b62985a9
YCH
1778 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1779 r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
201e9eaa
PH
1780 player_url,
1781 'html5 player', fatal=False)
78caa52a 1782 player_desc = 'html5 player %s' % player_version
201e9eaa 1783
60064c53 1784 parts_sizes = self._signature_cache_id(encrypted_sig)
69ea8ca4 1785 self.to_screen('{%s} signature length %s, %s' %
9e1a5b84 1786 (format_id, parts_sizes, player_desc))
201e9eaa
PH
1787
1788 signature = self._decrypt_signature(
1789 encrypted_sig, video_id, player_url, age_gate)
1790 url += '&signature=' + signature
1791 if 'ratebypass' not in url:
1792 url += '&ratebypass=yes'
c9afb51c 1793
94278f72
YCH
1794 dct = {
1795 'format_id': format_id,
1796 'url': url,
1797 'player_url': player_url,
1798 }
1799 if format_id in self._formats:
1800 dct.update(self._formats[format_id])
3318832e 1801 if format_id in formats_spec:
1802 dct.update(formats_spec[format_id])
94278f72 1803
aabc2be6
S
1804 # Some itags are not included in DASH manifest thus corresponding formats will
1805 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1806 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1807 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1808 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 1809
c63ca0ee
S
1810 filesize = int_or_none(url_data.get(
1811 'clen', [None])[0]) or _extract_filesize(url)
1812
54fc90aa
RA
1813 quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
1814
94278f72 1815 more_fields = {
c63ca0ee 1816 'filesize': filesize,
aabc2be6 1817 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
c9afb51c
AH
1818 'width': width,
1819 'height': height,
1820 'fps': int_or_none(url_data.get('fps', [None])[0]),
54fc90aa
RA
1821 'format_note': quality,
1822 'quality': q(quality),
c9afb51c 1823 }
94278f72
YCH
1824 for key, value in more_fields.items():
1825 if value:
1826 dct[key] = value
aabc2be6
S
1827 type_ = url_data.get('type', [None])[0]
1828 if type_:
1829 type_split = type_.split(';')
1830 kind_ext = type_split[0].split('/')
1831 if len(kind_ext) == 2:
94278f72
YCH
1832 kind, _ = kind_ext
1833 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
1834 if kind in ('audio', 'video'):
1835 codecs = None
1836 for mobj in re.finditer(
1837 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1838 if mobj.group('key') == 'codecs':
1839 codecs = mobj.group('val')
1840 break
1841 if codecs:
6310acf5 1842 dct.update(parse_codecs(codecs))
e4a60912
S
1843 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
1844 dct['downloader_options'] = {
1845 # Youtube throttles chunks >~10M
1846 'http_chunk_size': 10485760,
1847 }
aabc2be6 1848 formats.append(dct)
1d043b93
JMF
1849 elif video_info.get('hlsvp'):
1850 manifest_url = video_info['hlsvp'][0]
89beedd3
RA
1851 formats = []
1852 m3u8_formats = self._extract_m3u8_formats(
1853 manifest_url, video_id, 'mp4', fatal=False)
1854 for a_format in m3u8_formats:
1855 itag = self._search_regex(
1856 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
1857 if itag:
1858 a_format['format_id'] = itag
1859 if itag in self._formats:
1860 dct = self._formats[itag].copy()
1861 dct.update(a_format)
1862 a_format = dct
1863 a_format['player_url'] = player_url
1864 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
049d71d8 1865 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
89beedd3 1866 formats.append(a_format)
c5e8d7af 1867 else:
4c76aa06
RA
1868 error_message = clean_html(video_info.get('reason', [None])[0])
1869 if not error_message:
1870 error_message = extract_unavailable_message()
1871 if error_message:
1872 raise ExtractorError(error_message, expected=True)
69ea8ca4 1873 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 1874
7e72694b
S
1875 # uploader
1876 video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
1877 if video_uploader:
1878 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
1879 else:
1880 self._downloader.report_warning('unable to extract uploader name')
1881
1882 # uploader_id
1883 video_uploader_id = None
1884 video_uploader_url = None
1885 mobj = re.search(
1886 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1887 video_webpage)
1888 if mobj is not None:
1889 video_uploader_id = mobj.group('uploader_id')
1890 video_uploader_url = mobj.group('uploader_url')
1891 else:
1892 self._downloader.report_warning('unable to extract uploader nickname')
1893
1894 # thumbnail image
1895 # We try first to get a high quality image:
1896 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1897 video_webpage, re.DOTALL)
1898 if m_thumb is not None:
1899 video_thumbnail = m_thumb.group(1)
1900 elif 'thumbnail_url' not in video_info:
1901 self._downloader.report_warning('unable to extract video thumbnail')
1902 video_thumbnail = None
1903 else: # don't panic if we can't find it
1904 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1905
1906 # upload date
1907 upload_date = self._html_search_meta(
1908 'datePublished', video_webpage, 'upload date', default=None)
1909 if not upload_date:
1910 upload_date = self._search_regex(
1911 [r'(?s)id="eow-date.*?>(.*?)</span>',
1912 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
1913 video_webpage, 'upload date', default=None)
1914 upload_date = unified_strdate(upload_date)
1915
1916 video_license = self._html_search_regex(
1917 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1918 video_webpage, 'license', default=None)
1919
1920 m_music = re.search(
1921 r'''(?x)
1922 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
1923 <ul[^>]*>\s*
1924 <li>(?P<title>.+?)
1925 by (?P<creator>.+?)
1926 (?:
1927 \(.+?\)|
1928 <a[^>]*
1929 (?:
1930 \bhref=["\']/red[^>]*>| # drop possible
1931 >\s*Listen ad-free with YouTube Red # YouTube Red ad
1932 )
1933 .*?
1934 )?</li
1935 ''',
1936 video_webpage)
1937 if m_music:
1938 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1939 video_creator = clean_html(m_music.group('creator'))
1940 else:
1941 video_alt_title = video_creator = None
1942
1943 def extract_meta(field):
1944 return self._html_search_regex(
1945 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
1946 video_webpage, field, default=None)
1947
1948 track = extract_meta('Song')
1949 artist = extract_meta('Artist')
1950
1951 m_episode = re.search(
1952 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
1953 video_webpage)
1954 if m_episode:
1955 series = m_episode.group('series')
1956 season_number = int(m_episode.group('season'))
1957 episode_number = int(m_episode.group('episode'))
1958 else:
1959 series = season_number = episode_number = None
1960
1961 m_cat_container = self._search_regex(
1962 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1963 video_webpage, 'categories', default=None)
1964 if m_cat_container:
1965 category = self._html_search_regex(
1966 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1967 default=None)
1968 video_categories = None if category is None else [category]
1969 else:
1970 video_categories = None
1971
1972 video_tags = [
1973 unescapeHTML(m.group('content'))
1974 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1975
1976 def _extract_count(count_name):
1977 return str_to_int(self._search_regex(
1978 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1979 % re.escape(count_name),
1980 video_webpage, count_name, default=None))
1981
1982 like_count = _extract_count('like')
1983 dislike_count = _extract_count('dislike')
1984
1985 # subtitles
1986 video_subtitles = self.extract_subtitles(video_id, video_webpage)
1987 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1988
1989 video_duration = try_get(
1990 video_info, lambda x: int_or_none(x['length_seconds'][0]))
1991 if not video_duration:
1992 video_duration = parse_duration(self._html_search_meta(
1993 'duration', video_webpage, 'video duration'))
1994
1995 # annotations
1996 video_annotations = None
1997 if self._downloader.params.get('writeannotations', False):
1998 video_annotations = self._extract_annotations(video_id)
1999
2000 chapters = self._extract_chapters(description_original, video_duration)
2001
dd27fd17 2002 # Look for the DASH manifest
203fb43f 2003 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2004 dash_mpd_fatal = True
8ff648e4 2005 for mpd_url in dash_mpds:
d8d24a92 2006 dash_formats = {}
774e208f 2007 try:
05d0d131
YCH
2008 def decrypt_sig(mobj):
2009 s = mobj.group(1)
2010 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2011 return '/signature/%s' % dec_s
2012
8ff648e4 2013 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2014
8ff648e4 2015 for df in self._extract_mpd_formats(
2016 mpd_url, video_id, fatal=dash_mpd_fatal,
2017 formats_dict=self._formats):
c63ca0ee
S
2018 if not df.get('filesize'):
2019 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2020 # Do not overwrite DASH format found in some previous DASH manifest
2021 if df['format_id'] not in dash_formats:
2022 dash_formats[df['format_id']] = df
77c6fb5b
S
2023 # Additional DASH manifests may end up in HTTP Error 403 therefore
2024 # allow them to fail without bug report message if we already have
2025 # some DASH manifest succeeded. This is temporary workaround to reduce
2026 # burst of bug reports until we figure out the reason and whether it
2027 # can be fixed at all.
2028 dash_mpd_fatal = False
774e208f
PH
2029 except (ExtractorError, KeyError) as e:
2030 self.report_warning(
2031 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2032 if dash_formats:
04b3b3df
JMF
2033 # Remove the formats we found through non-DASH, they
2034 # contain less info and it can be wrong, because we use
2035 # fixed values (for example the resolution). See
2036 # https://github.com/rg3/youtube-dl/issues/5774 for an
2037 # example.
d80265cc 2038 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2039 formats.extend(dash_formats.values())
d80044c2 2040
6271f1ca
PH
2041 # Check for malformed aspect ratio
2042 stretched_m = re.search(
2043 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2044 video_webpage)
2045 if stretched_m:
313dfc45
LL
2046 w = float(stretched_m.group('w'))
2047 h = float(stretched_m.group('h'))
5faf9fed
S
2048 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2049 # We will only process correct ratios.
313dfc45 2050 if w > 0 and h > 0:
41f24c32 2051 ratio = w / h
313dfc45
LL
2052 for f in formats:
2053 if f.get('vcodec') != 'none':
2054 f['stretched_ratio'] = ratio
6271f1ca 2055
4bcc7bd1 2056 self._sort_formats(formats)
4ea3be0a 2057
d77ab8e2
S
2058 self.mark_watched(video_id, video_info)
2059
4ea3be0a 2060 return {
8bcc8756
JW
2061 'id': video_id,
2062 'uploader': video_uploader,
2063 'uploader_id': video_uploader_id,
fd050249 2064 'uploader_url': video_uploader_url,
8bcc8756 2065 'upload_date': upload_date,
7caf9830 2066 'license': video_license,
936784b2 2067 'creator': video_creator or artist,
8bcc8756 2068 'title': video_title,
936784b2 2069 'alt_title': video_alt_title or track,
8bcc8756
JW
2070 'thumbnail': video_thumbnail,
2071 'description': video_description,
2072 'categories': video_categories,
000b6b5a 2073 'tags': video_tags,
8bcc8756 2074 'subtitles': video_subtitles,
360e1ca5 2075 'automatic_captions': automatic_captions,
8bcc8756
JW
2076 'duration': video_duration,
2077 'age_limit': 18 if age_gate else 0,
2078 'annotations': video_annotations,
9cafc3fd 2079 'chapters': chapters,
7e8c0af0 2080 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2081 'view_count': view_count,
4ea3be0a 2082 'like_count': like_count,
2083 'dislike_count': dislike_count,
2d30521a 2084 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
8bcc8756 2085 'formats': formats,
2fe1ff85 2086 'is_live': is_live,
7c80519c 2087 'start_time': start_time,
297a564b 2088 'end_time': end_time,
12afdc2a
S
2089 'series': series,
2090 'season_number': season_number,
2091 'episode_number': episode_number,
936784b2
S
2092 'track': track,
2093 'artist': artist,
4ea3be0a 2094 }
c5e8d7af 2095
5f6a1245 2096
8e7aad20 2097class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2098 IE_DESC = 'YouTube.com playlists'
d67cc9fa 2099 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
2100 (?:https?://)?
2101 (?:\w+\.)?
c5e8d7af 2102 (?:
feaa5ad7
S
2103 youtube\.com/
2104 (?:
87dadd45 2105 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
feaa5ad7
S
2106 \? (?:.*?[&;])*? (?:p|a|list)=
2107 | p/
2108 )|
2109 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
c5e8d7af 2110 )
d67cc9fa 2111 (
a6857510 2112 (?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,}
5f6a1245 2113 # Top tracks, they can also include dots
d67cc9fa
JMF
2114 |(?:MC)[\w\.]*
2115 )
c5e8d7af
PH
2116 .*
2117 |
d0ba5587
S
2118 (%(playlist_id)s)
2119 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
8d81f3e3 2120 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
648e6a1f 2121 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
78caa52a 2122 IE_NAME = 'youtube:playlist'
81127aa5
PH
2123 _TESTS = [{
2124 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2125 'info_dict': {
2126 'title': 'ytdl test PL',
a1cf99d0 2127 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
81127aa5
PH
2128 },
2129 'playlist_count': 3,
9291475f
PH
2130 }, {
2131 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2132 'info_dict': {
acf757f4 2133 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
9291475f
PH
2134 'title': 'YDL_Empty_List',
2135 },
2136 'playlist_count': 0,
4201ba13 2137 'skip': 'This playlist is private',
9291475f
PH
2138 }, {
2139 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2140 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2141 'info_dict': {
2142 'title': '29C3: Not my department',
acf757f4 2143 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
9291475f
PH
2144 },
2145 'playlist_count': 95,
2146 }, {
2147 'note': 'issue #673',
2148 'url': 'PLBB231211A4F62143',
2149 'info_dict': {
f46a8702 2150 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 2151 'id': 'PLBB231211A4F62143',
9291475f
PH
2152 },
2153 'playlist_mincount': 26,
2154 }, {
2155 'note': 'Large playlist',
2156 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2157 'info_dict': {
2158 'title': 'Uploads from Cauchemar',
acf757f4 2159 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
9291475f
PH
2160 },
2161 'playlist_mincount': 799,
2162 }, {
2163 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2164 'info_dict': {
2165 'title': 'YDL_safe_search',
acf757f4 2166 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
2167 },
2168 'playlist_count': 2,
4201ba13 2169 'skip': 'This playlist is private',
ac7553d0
PH
2170 }, {
2171 'note': 'embedded',
2d3d2997 2172 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0
PH
2173 'playlist_count': 4,
2174 'info_dict': {
2175 'title': 'JODA15',
acf757f4 2176 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0 2177 }
87dadd45
S
2178 }, {
2179 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2180 'playlist_mincount': 485,
2181 'info_dict': {
2182 'title': '2017 華語最新單曲 (2/24更新)',
2183 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2184 }
6b08cdf6
PH
2185 }, {
2186 'note': 'Embedded SWF player',
2d3d2997 2187 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
6b08cdf6
PH
2188 'playlist_count': 4,
2189 'info_dict': {
2190 'title': 'JODA7',
acf757f4 2191 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
6b08cdf6 2192 }
4b7df0d3
JMF
2193 }, {
2194 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2195 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2196 'info_dict': {
acf757f4
PH
2197 'title': 'Uploads from Interstellar Movie',
2198 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2199 },
481cc733 2200 'playlist_mincount': 21,
dacb3a86
S
2201 }, {
2202 # Playlist URL that does not actually serve a playlist
2203 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2204 'info_dict': {
2205 'id': 'FqZTN594JQw',
2206 'ext': 'webm',
2207 'title': "Smiley's People 01 detective, Adventure Series, Action",
2208 'uploader': 'STREEM',
2209 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2210 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2211 'upload_date': '20150526',
2212 'license': 'Standard YouTube License',
2213 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2214 'categories': ['People & Blogs'],
2215 'tags': list,
2216 'like_count': int,
2217 'dislike_count': int,
2218 },
2219 'params': {
2220 'skip_download': True,
2221 },
2222 'add_ie': [YoutubeIE.ie_key()],
481cc733
S
2223 }, {
2224 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2225 'info_dict': {
2226 'id': 'yeWKywCrFtk',
2227 'ext': 'mp4',
2228 'title': 'Small Scale Baler and Braiding Rugs',
2229 'uploader': 'Backus-Page House Museum',
2230 'uploader_id': 'backuspagemuseum',
ec85ded8 2231 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
481cc733
S
2232 'upload_date': '20161008',
2233 'license': 'Standard YouTube License',
2234 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2235 'categories': ['Nonprofits & Activism'],
2236 'tags': list,
2237 'like_count': int,
2238 'dislike_count': int,
2239 },
2240 'params': {
2241 'noplaylist': True,
2242 'skip_download': True,
2243 },
feaa5ad7
S
2244 }, {
2245 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2246 'only_matching': True,
a6857510
S
2247 }, {
2248 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2249 'only_matching': True,
81127aa5 2250 }]
c5e8d7af 2251
880e1c52
JMF
2252 def _real_initialize(self):
2253 self._login()
2254
652cdaa2 2255 def _extract_mix(self, playlist_id):
99209c29 2256 # The mixes are generated from a single video
652cdaa2 2257 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
2258 ids = []
2259 last_id = playlist_id[-11:]
2260 for n in itertools.count(1):
2261 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2262 webpage = self._download_webpage(
2263 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2264 new_ids = orderedSet(re.findall(
2265 r'''(?xs)data-video-username=".*?".*?
2266 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2267 webpage))
2268 # Fetch new pages until all the videos are repeated, it seems that
2269 # there are always 51 unique videos.
2270 new_ids = [_id for _id in new_ids if _id not in ids]
2271 if not new_ids:
2272 break
2273 ids.extend(new_ids)
2274 last_id = ids[-1]
2275
2276 url_results = self._ids_to_results(ids)
2277
bc2f773b 2278 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5
PH
2279 title_span = (
2280 search_title('playlist-title') or
2281 search_title('title long-title') or
2282 search_title('title'))
76d1700b 2283 title = clean_html(title_span)
652cdaa2
JMF
2284
2285 return self.playlist_result(url_results, playlist_id, title)
2286
448830ce 2287 def _extract_playlist(self, playlist_id):
dbb94fb0
S
2288 url = self._TEMPLATE_URL % playlist_id
2289 page = self._download_webpage(url, playlist_id)
dbb94fb0 2290
8bc0800d
G
2291 # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
2292 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
39b62db1
YCH
2293 match = match.strip()
2294 # Check if the playlist exists or is private
4201ba13
S
2295 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2296 if mobj:
2297 reason = mobj.group('reason')
2298 message = 'This playlist %s' % reason
2299 if 'private' in reason:
2300 message += ', use --username or --netrc to access it'
2301 message += '.'
2302 raise ExtractorError(message, expected=True)
39b62db1
YCH
2303 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2304 raise ExtractorError(
2305 'Invalid parameters. Maybe URL is incorrect.',
2306 expected=True)
2307 elif re.match(r'[^<]*Choose your language[^<]*', match):
2308 continue
2309 else:
2310 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 2311
dbb94fb0 2312 playlist_title = self._html_search_regex(
63b4295d 2313 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
dacb3a86 2314 page, 'title', default=None)
c5e8d7af 2315
07aeced6
S
2316 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2317 uploader = self._search_regex(
2318 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2319 page, 'uploader', default=None)
2320 mobj = re.search(
2321 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2322 page)
2323 if mobj:
2324 uploader_id = mobj.group('uploader_id')
2325 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2326 else:
2327 uploader_id = uploader_url = None
2328
dacb3a86
S
2329 has_videos = True
2330
2331 if not playlist_title:
2332 try:
2333 # Some playlist URLs don't actually serve a playlist (e.g.
2334 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2335 next(self._entries(page, playlist_id))
2336 except StopIteration:
2337 has_videos = False
2338
07aeced6 2339 playlist = self.playlist_result(
dacb3a86 2340 self._entries(page, playlist_id), playlist_id, playlist_title)
07aeced6
S
2341 playlist.update({
2342 'uploader': uploader,
2343 'uploader_id': uploader_id,
2344 'uploader_url': uploader_url,
2345 })
2346
2347 return has_videos, playlist
c5e8d7af 2348
ebf1b291 2349 def _check_download_just_video(self, url, playlist_id):
448830ce
S
2350 # Check if it's a video-specific URL
2351 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
481cc733 2352 video_id = query_dict.get('v', [None])[0] or self._search_regex(
87dadd45 2353 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
481cc733
S
2354 'video id', default=None)
2355 if video_id:
448830ce
S
2356 if self._downloader.params.get('noplaylist'):
2357 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
dacb3a86 2358 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce
S
2359 else:
2360 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
dacb3a86
S
2361 return video_id, None
2362 return None, None
448830ce 2363
ebf1b291
S
2364 def _real_extract(self, url):
2365 # Extract playlist id
2366 mobj = re.match(self._VALID_URL, url)
2367 if mobj is None:
2368 raise ExtractorError('Invalid URL: %s' % url)
2369 playlist_id = mobj.group(1) or mobj.group(2)
2370
dacb3a86 2371 video_id, video = self._check_download_just_video(url, playlist_id)
ebf1b291
S
2372 if video:
2373 return video
2374
466a6145 2375 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
2376 # Mixes require a custom extraction process
2377 return self._extract_mix(playlist_id)
2378
dacb3a86
S
2379 has_videos, playlist = self._extract_playlist(playlist_id)
2380 if has_videos or not video_id:
2381 return playlist
2382
2383 # Some playlist URLs don't actually serve a playlist (see
2384 # https://github.com/rg3/youtube-dl/issues/10537).
2385 # Fallback to plain video extraction if there is a video id
2386 # along with playlist id.
2387 return self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce 2388
c5e8d7af 2389
648e6a1f 2390class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2391 IE_DESC = 'YouTube.com channels'
9ff67727 2392 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 2393 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 2394 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 2395 IE_NAME = 'youtube:channel'
cdc628a4
PH
2396 _TESTS = [{
2397 'note': 'paginated channel',
2398 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2399 'playlist_mincount': 91,
acf757f4 2400 'info_dict': {
9170ca5b
JMF
2401 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2402 'title': 'Uploads from lex will',
acf757f4 2403 }
5c43afd4
JMF
2404 }, {
2405 'note': 'Age restricted channel',
2406 # from https://www.youtube.com/user/DeusExOfficial
2407 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2408 'playlist_mincount': 64,
2409 'info_dict': {
2410 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2411 'title': 'Uploads from Deus Ex',
2412 },
cdc628a4 2413 }]
c5e8d7af 2414
e462474e
S
2415 @classmethod
2416 def suitable(cls, url):
f07e276a
S
2417 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2418 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 2419
9558dcec
S
2420 def _build_template_url(self, url, channel_id):
2421 return self._TEMPLATE_URL % channel_id
2422
c5e8d7af 2423 def _real_extract(self, url):
9ff67727 2424 channel_id = self._match_id(url)
c5e8d7af 2425
9558dcec 2426 url = self._build_template_url(url, channel_id)
386bdfa6
S
2427
2428 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2429 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2430 # otherwise fallback on channel by page extraction
2431 channel_page = self._download_webpage(
2432 url + '?view=57', channel_id,
2433 'Downloading channel page', fatal=False)
2b3c2546
PH
2434 if channel_page is False:
2435 channel_playlist_id = False
2436 else:
2437 channel_playlist_id = self._html_search_meta(
2438 'channelId', channel_page, 'channel id', default=None)
2439 if not channel_playlist_id:
73c4ac2c
S
2440 channel_url = self._html_search_meta(
2441 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2442 channel_page, 'channel url', default=None)
2443 if channel_url:
2444 channel_playlist_id = self._search_regex(
2445 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2446 channel_url, 'channel id', default=None)
386bdfa6
S
2447 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2448 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
2449 return self.url_result(
2450 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 2451
60bf45c8 2452 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
2453 autogenerated = re.search(r'''(?x)
2454 class="[^"]*?(?:
2455 channel-header-autogenerated-label|
2456 yt-channel-title-autogenerated
2457 )[^"]*"''', channel_page) is not None
c5e8d7af 2458
b9643eed
JMF
2459 if autogenerated:
2460 # The videos are contained in a single page
2461 # the ajax pages can't be used, they are empty
b82f815f 2462 entries = [
fb69240c
S
2463 self.url_result(
2464 video_id, 'Youtube', video_id=video_id,
2465 video_title=video_title)
8f02ad4f 2466 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
2467 return self.playlist_result(entries, channel_id)
2468
73c4ac2c
S
2469 try:
2470 next(self._entries(channel_page, channel_id))
2471 except StopIteration:
2472 alert_message = self._html_search_regex(
2473 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2474 channel_page, 'alert', default=None, group='alert')
2475 if alert_message:
2476 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2477
648e6a1f 2478 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
2479
2480
eb0f3e7e 2481class YoutubeUserIE(YoutubeChannelIE):
78caa52a 2482 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
ea696249 2483 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
9558dcec 2484 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 2485 IE_NAME = 'youtube:user'
c5e8d7af 2486
cdc628a4
PH
2487 _TESTS = [{
2488 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2489 'playlist_mincount': 320,
2490 'info_dict': {
73c4ac2c
S
2491 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2492 'title': 'Uploads from The Linux Foundation',
cdc628a4 2493 }
9558dcec
S
2494 }, {
2495 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2496 # but not https://www.youtube.com/user/12minuteathlete/videos
2497 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2498 'playlist_mincount': 249,
2499 'info_dict': {
2500 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2501 'title': 'Uploads from 12 Minute Athlete',
2502 }
cdc628a4
PH
2503 }, {
2504 'url': 'ytuser:phihag',
2505 'only_matching': True,
daa0df9e
YCH
2506 }, {
2507 'url': 'https://www.youtube.com/c/gametrailers',
2508 'only_matching': True,
9558dcec
S
2509 }, {
2510 'url': 'https://www.youtube.com/gametrailers',
2511 'only_matching': True,
73c4ac2c 2512 }, {
0e879f43 2513 # This channel is not available, geo restricted to JP
73c4ac2c
S
2514 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2515 'only_matching': True,
cdc628a4
PH
2516 }]
2517
e3ea4790 2518 @classmethod
f4b05232 2519 def suitable(cls, url):
e3ea4790
JMF
2520 # Don't return True if the url can be extracted with other youtube
2521 # extractor, the regex would is too permissive and it would match.
f3a58d46 2522 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2523 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
2524 return False
2525 else:
2526 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 2527
9558dcec
S
2528 def _build_template_url(self, url, channel_id):
2529 mobj = re.match(self._VALID_URL, url)
2530 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2531
b05654f0 2532
f07e276a
S
2533class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2534 IE_DESC = 'YouTube.com live streams'
073d5bf5 2535 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
f07e276a
S
2536 IE_NAME = 'youtube:live'
2537
2538 _TESTS = [{
2d3d2997 2539 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
f07e276a
S
2540 'info_dict': {
2541 'id': 'a48o2S1cPoo',
2542 'ext': 'mp4',
2543 'title': 'The Young Turks - Live Main Show',
2544 'uploader': 'The Young Turks',
2545 'uploader_id': 'TheYoungTurks',
ec85ded8 2546 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
f07e276a
S
2547 'upload_date': '20150715',
2548 'license': 'Standard YouTube License',
2549 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2550 'categories': ['News & Politics'],
2551 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2552 'like_count': int,
2553 'dislike_count': int,
2554 },
2555 'params': {
2556 'skip_download': True,
2557 },
2558 }, {
2d3d2997 2559 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
f07e276a 2560 'only_matching': True,
c1b2a085
S
2561 }, {
2562 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2563 'only_matching': True,
073d5bf5
S
2564 }, {
2565 'url': 'https://www.youtube.com/TheYoungTurks/live',
2566 'only_matching': True,
f07e276a
S
2567 }]
2568
2569 def _real_extract(self, url):
2570 mobj = re.match(self._VALID_URL, url)
2571 channel_id = mobj.group('id')
2572 base_url = mobj.group('base_url')
2573 webpage = self._download_webpage(url, channel_id, fatal=False)
2574 if webpage:
2575 page_type = self._og_search_property(
e7f3529f 2576 'type', webpage, 'page type', default='')
f07e276a
S
2577 video_id = self._html_search_meta(
2578 'videoId', webpage, 'video id', default=None)
e7f3529f
S
2579 if page_type.startswith('video') and video_id and re.match(
2580 r'^[0-9A-Za-z_-]{11}$', video_id):
f07e276a
S
2581 return self.url_result(video_id, YoutubeIE.ie_key())
2582 return self.url_result(base_url)
2583
2584
e462474e
S
2585class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2586 IE_DESC = 'YouTube.com user/channel playlists'
2587 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2588 IE_NAME = 'youtube:playlists'
0c148415 2589
e568c223 2590 _TESTS = [{
2d3d2997 2591 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
0c148415
S
2592 'playlist_mincount': 4,
2593 'info_dict': {
2594 'id': 'ThirstForScience',
2595 'title': 'Thirst for Science',
2596 },
e568c223
S
2597 }, {
2598 # with "Load more" button
2d3d2997 2599 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
e568c223
S
2600 'playlist_mincount': 70,
2601 'info_dict': {
2602 'id': 'igorkle1',
2603 'title': 'Игорь Клейнер',
2604 },
e462474e
S
2605 }, {
2606 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2607 'playlist_mincount': 17,
2608 'info_dict': {
2609 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2610 'title': 'Chem Player',
2611 },
e568c223 2612 }]
0c148415
S
2613
2614
870f3bfc
S
2615class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2616 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2617
2618
2619class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
78caa52a 2620 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
2621 # there doesn't appear to be a real limit, for example if you search for
2622 # 'python' you get more than 8.000.000 results
2623 _MAX_RESULTS = float('inf')
78caa52a 2624 IE_NAME = 'youtube:search'
b05654f0 2625 _SEARCH_KEY = 'ytsearch'
b4c08069 2626 _EXTRA_QUERY_ARGS = {}
9dd8e46a 2627 _TESTS = []
b05654f0 2628
b05654f0
PH
2629 def _get_n_results(self, query, n):
2630 """Get a specified number of results for a query"""
2631
b4c08069 2632 videos = []
b05654f0
PH
2633 limit = n
2634
a22b2fd1
YCH
2635 url_query = {
2636 'search_query': query.encode('utf-8'),
2637 }
2638 url_query.update(self._EXTRA_QUERY_ARGS)
2639 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2640
b4c08069 2641 for pagenum in itertools.count(1):
b4c08069 2642 data = self._download_json(
69ea8ca4 2643 result_url, video_id='query "%s"' % query,
b4c08069 2644 note='Downloading page %s' % pagenum,
a22b2fd1
YCH
2645 errnote='Unable to download API page',
2646 query={'spf': 'navigate'})
b4c08069 2647 html_content = data[1]['body']['content']
7cc3570e 2648
b4c08069 2649 if 'class="search-message' in html_content:
07ad22b8 2650 raise ExtractorError(
78caa52a 2651 '[youtube] No video results', expected=True)
b05654f0 2652
870f3bfc 2653 new_videos = list(self._process_page(html_content))
b4c08069
JMF
2654 videos += new_videos
2655 if not new_videos or len(videos) > limit:
2656 break
a22b2fd1
YCH
2657 next_link = self._html_search_regex(
2658 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2659 html_content, 'next link', default=None)
2660 if next_link is None:
2661 break
2662 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
b05654f0 2663
b4c08069
JMF
2664 if len(videos) > n:
2665 videos = videos[:n]
b05654f0 2666 return self.playlist_result(videos, query)
75dff0ee 2667
c9ae7b95 2668
a3dd9248 2669class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 2670 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 2671 _SEARCH_KEY = 'ytsearchdate'
78caa52a 2672 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 2673 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 2674
c9ae7b95 2675
870f3bfc 2676class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
78caa52a
PH
2677 IE_DESC = 'YouTube.com search URLs'
2678 IE_NAME = 'youtube:search_url'
d2c1f79f 2679 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
cdc628a4
PH
2680 _TESTS = [{
2681 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2682 'playlist_mincount': 5,
2683 'info_dict': {
2684 'title': 'youtube-dl test video',
2685 }
d2c1f79f
S
2686 }, {
2687 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2688 'only_matching': True,
cdc628a4 2689 }]
c9ae7b95
PH
2690
2691 def _real_extract(self, url):
2692 mobj = re.match(self._VALID_URL, url)
7fd002c0 2693 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 2694 webpage = self._download_webpage(url, query)
175c2e9e 2695 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
2696
2697
136dadde 2698class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 2699 IE_DESC = 'YouTube.com (multi-season) shows'
92519402 2700 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 2701 IE_NAME = 'youtube:show'
cdc628a4 2702 _TESTS = [{
4003bd82 2703 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 2704 'playlist_mincount': 5,
cdc628a4
PH
2705 'info_dict': {
2706 'id': 'airdisasters',
2707 'title': 'Air Disasters',
2708 }
2709 }]
75dff0ee
JMF
2710
2711 def _real_extract(self, url):
136dadde
S
2712 playlist_id = self._match_id(url)
2713 return super(YoutubeShowIE, self)._real_extract(
2714 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
2715
2716
b2e8bc1b 2717class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 2718 """
25f14e9f 2719 Base class for feed extractors
d7ae0639
JMF
2720 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2721 """
b2e8bc1b 2722 _LOGIN_REQUIRED = True
d7ae0639
JMF
2723
2724 @property
2725 def IE_NAME(self):
78caa52a 2726 return 'youtube:%s' % self._FEED_NAME
04cc9617 2727
81f0259b 2728 def _real_initialize(self):
b2e8bc1b 2729 self._login()
81f0259b 2730
3853309f 2731 def _entries(self, page):
2bc43303
JMF
2732 # The extraction process is the same as for playlists, but the regex
2733 # for the video ids doesn't contain an index
2734 ids = []
2735 more_widget_html = content_html = page
2bc43303
JMF
2736 for page_num in itertools.count(1):
2737 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
2738
2739 # 'recommended' feed has infinite 'load more' and each new portion spins
2740 # the same videos in (sometimes) slightly different order, so we'll check
2741 # for unicity and break when portion has no new videos
3853309f 2742 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
62c95fd5
S
2743 if not new_ids:
2744 break
2745
2bc43303
JMF
2746 ids.extend(new_ids)
2747
3853309f
S
2748 for entry in self._ids_to_results(new_ids):
2749 yield entry
2750
2bc43303
JMF
2751 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2752 if not mobj:
2753 break
2754
2755 more = self._download_json(
25f14e9f 2756 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303
JMF
2757 'Downloading page #%s' % page_num,
2758 transform_source=uppercase_escape)
2759 content_html = more['content_html']
2760 more_widget_html = more['load_more_widget_html']
2761
3853309f
S
2762 def _real_extract(self, url):
2763 page = self._download_webpage(
2764 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
2765 self._PLAYLIST_TITLE)
25f14e9f 2766 return self.playlist_result(
3853309f 2767 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
25f14e9f
S
2768
2769
2770class YoutubeWatchLaterIE(YoutubePlaylistIE):
2771 IE_NAME = 'youtube:watchlater'
2772 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
92519402 2773 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 2774
bc7a9cd8
S
2775 _TESTS = [{
2776 'url': 'https://www.youtube.com/playlist?list=WL',
2777 'only_matching': True,
2778 }, {
2779 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2780 'only_matching': True,
2781 }]
25f14e9f
S
2782
2783 def _real_extract(self, url):
7e5dc339 2784 _, video = self._check_download_just_video(url, 'WL')
ebf1b291
S
2785 if video:
2786 return video
dacb3a86
S
2787 _, playlist = self._extract_playlist('WL')
2788 return playlist
f459d170 2789
5f6a1245 2790
c626a3d9 2791class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 2792 IE_NAME = 'youtube:favorites'
f3a34072 2793 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
92519402 2794 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
2795 _LOGIN_REQUIRED = True
2796
2797 def _real_extract(self, url):
2798 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 2799 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 2800 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
2801
2802
25f14e9f
S
2803class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2804 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
92519402 2805 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
25f14e9f
S
2806 _FEED_NAME = 'recommended'
2807 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 2808
1ed5b5c9 2809
25f14e9f
S
2810class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2811 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
92519402 2812 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
25f14e9f
S
2813 _FEED_NAME = 'subscriptions'
2814 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 2815
1ed5b5c9 2816
25f14e9f
S
2817class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2818 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
92519402 2819 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
25f14e9f
S
2820 _FEED_NAME = 'history'
2821 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
2822
2823
15870e90
PH
2824class YoutubeTruncatedURLIE(InfoExtractor):
2825 IE_NAME = 'youtube:truncated_url'
2826 IE_DESC = False # Do not list
975d35db 2827 _VALID_URL = r'''(?x)
b95aab84
PH
2828 (?:https?://)?
2829 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2830 (?:watch\?(?:
c4808c60 2831 feature=[a-z_]+|
b95aab84
PH
2832 annotation_id=annotation_[^&]+|
2833 x-yt-cl=[0-9]+|
c1708b89 2834 hl=[^&]*|
287be8c6 2835 t=[0-9]+
b95aab84
PH
2836 )?
2837 |
2838 attribution_link\?a=[^&]+
2839 )
2840 $
975d35db 2841 '''
15870e90 2842
c4808c60 2843 _TESTS = [{
2d3d2997 2844 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 2845 'only_matching': True,
dc2fc736 2846 }, {
2d3d2997 2847 'url': 'https://www.youtube.com/watch?',
dc2fc736 2848 'only_matching': True,
b95aab84
PH
2849 }, {
2850 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2851 'only_matching': True,
2852 }, {
2853 'url': 'https://www.youtube.com/watch?feature=foo',
2854 'only_matching': True,
c1708b89
PH
2855 }, {
2856 'url': 'https://www.youtube.com/watch?hl=en-GB',
2857 'only_matching': True,
287be8c6
PH
2858 }, {
2859 'url': 'https://www.youtube.com/watch?t=2372',
2860 'only_matching': True,
c4808c60
PH
2861 }]
2862
15870e90
PH
2863 def _real_extract(self, url):
2864 raise ExtractorError(
78caa52a
PH
2865 'Did you forget to quote the URL? Remember that & is a meta '
2866 'character in most shells, so you want to put the URL in quotes, '
2867 'like youtube-dl '
2d3d2997 2868 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
78caa52a 2869 ' or simply youtube-dl BaW_jenozKc .',
15870e90 2870 expected=True)
772fd5cc
PH
2871
2872
2873class YoutubeTruncatedIDIE(InfoExtractor):
2874 IE_NAME = 'youtube:truncated_id'
2875 IE_DESC = False # Do not list
b95aab84 2876 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
2877
2878 _TESTS = [{
2879 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2880 'only_matching': True,
2881 }]
2882
2883 def _real_extract(self, url):
2884 video_id = self._match_id(url)
2885 raise ExtractorError(
2886 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2887 expected=True)