]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
[youtube] Update tests
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
8d81f3e3 19 compat_kwargs,
c5e8d7af 20 compat_parse_qs,
7fd002c0
S
21 compat_urllib_parse_unquote,
22 compat_urllib_parse_unquote_plus,
15707c7e 23 compat_urllib_parse_urlencode,
7c80519c 24 compat_urllib_parse_urlparse,
7c61bd36 25 compat_urlparse,
c5e8d7af 26 compat_str,
4bb4a188
PH
27)
28from ..utils import (
c5e8d7af 29 clean_html,
9b9c5355 30 error_to_compat_str,
c5e8d7af 31 ExtractorError,
2d30521a 32 float_or_none,
4bb4a188
PH
33 get_element_by_attribute,
34 get_element_by_id,
dd27fd17 35 int_or_none,
94278f72 36 mimetype2ext,
4bb4a188 37 orderedSet,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
54fc90aa 40 qualities,
0cb58b02 41 remove_quotes,
3995d37d 42 remove_start,
cf7e015f 43 smuggle_url,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
81c2f20b 49 uppercase_escape,
6e6bc8da 50 urlencode_postdata,
c5e8d7af
PH
51)
52
5f6a1245 53
de7f3446 54class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
55 """Provide base functions for Youtube extractors"""
56 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 57 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
58
59 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
60 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
61 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 62
b2e8bc1b
JMF
63 _NETRC_MACHINE = 'youtube'
64 # If True it will raise an error if no login info is provided
65 _LOGIN_REQUIRED = False
66
d0ba5587
S
67 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
68
b2e8bc1b 69 def _set_language(self):
810fb84d
PH
70 self._set_cookie(
71 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 72 # YouTube sets the expire time to about two months
810fb84d 73 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 74
25f14e9f
S
75 def _ids_to_results(self, ids):
76 return [
77 self.url_result(vid_id, 'Youtube', video_id=vid_id)
78 for vid_id in ids]
79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
68217024 88 username, password = self._get_login_info()
b2e8bc1b
JMF
89 # No authentication to be performed
90 if username is None:
70d35d16 91 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 92 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 93 return True
b2e8bc1b 94
7cc3570e
PH
95 login_page = self._download_webpage(
96 self._LOGIN_URL, None,
69ea8ca4
PH
97 note='Downloading login page',
98 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
99 if login_page is False:
100 return
b2e8bc1b 101
1212e997 102 login_form = self._hidden_inputs(login_page)
c5e8d7af 103
e00eb564
S
104 def req(url, f_req, note, errnote):
105 data = login_form.copy()
106 data.update({
107 'pstMsg': 1,
108 'checkConnection': 'youtube',
109 'checkedDomains': 'youtube',
110 'hl': 'en',
111 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 112 'f.req': json.dumps(f_req),
e00eb564
S
113 'flowName': 'GlifWebSignIn',
114 'flowEntry': 'ServiceLogin',
041bc3ad 115 })
e00eb564
S
116 return self._download_json(
117 url, None, note=note, errnote=errnote,
118 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
119 fatal=False,
120 data=urlencode_postdata(data), headers={
121 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
122 'Google-Accounts-XSRF': 1,
123 })
124
3995d37d
S
125 def warn(message):
126 self._downloader.report_warning(message)
127
128 lookup_req = [
129 username,
130 None, [], None, 'US', None, None, 2, False, True,
131 [
132 None, None,
133 [2, 1, None, 1,
134 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
135 None, [], 4],
136 1, [None, None, []], None, None, None, True
137 ],
138 username,
139 ]
140
e00eb564 141 lookup_results = req(
3995d37d 142 self._LOOKUP_URL, lookup_req,
e00eb564
S
143 'Looking up account info', 'Unable to look up account info')
144
145 if lookup_results is False:
146 return False
041bc3ad 147
3995d37d
S
148 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
149 if not user_hash:
150 warn('Unable to extract user hash')
151 return False
152
153 challenge_req = [
154 user_hash,
155 None, 1, None, [1, None, None, None, [password, None, True]],
156 [
157 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
158 1, [None, None, []], None, None, None, True
159 ]]
83317f69 160
3995d37d
S
161 challenge_results = req(
162 self._CHALLENGE_URL, challenge_req,
163 'Logging in', 'Unable to log in')
83317f69 164
3995d37d 165 if challenge_results is False:
e00eb564 166 return
83317f69 167
3995d37d
S
168 login_res = try_get(challenge_results, lambda x: x[0][5], list)
169 if login_res:
170 login_msg = try_get(login_res, lambda x: x[5], compat_str)
171 warn(
172 'Unable to login: %s' % 'Invalid password'
173 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
174 return False
175
176 res = try_get(challenge_results, lambda x: x[0][-1], list)
177 if not res:
178 warn('Unable to extract result entry')
179 return False
180
181 tfa = try_get(res, lambda x: x[0][0], list)
182 if tfa:
183 tfa_str = try_get(tfa, lambda x: x[2], compat_str)
184 if tfa_str == 'TWO_STEP_VERIFICATION':
185 # SEND_SUCCESS - TFA code has been successfully sent to phone
186 # QUOTA_EXCEEDED - reached the limit of TFA codes
187 status = try_get(tfa, lambda x: x[5], compat_str)
188 if status == 'QUOTA_EXCEEDED':
189 warn('Exceeded the limit of TFA codes, try later')
190 return False
191
192 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
193 if not tl:
194 warn('Unable to extract TL')
195 return False
196
197 tfa_code = self._get_tfa_info('2-step verification code')
198
199 if not tfa_code:
200 warn(
201 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
202 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
203 return False
204
205 tfa_code = remove_start(tfa_code, 'G-')
206
207 tfa_req = [
208 user_hash, None, 2, None,
209 [
210 9, None, None, None, None, None, None, None,
211 [None, tfa_code, True, 2]
212 ]]
213
214 tfa_results = req(
215 self._TFA_URL.format(tl), tfa_req,
216 'Submitting TFA code', 'Unable to submit TFA code')
217
218 if tfa_results is False:
219 return False
220
221 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
222 if tfa_res:
223 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
224 warn(
225 'Unable to finish TFA: %s' % 'Invalid TFA code'
226 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
227 return False
228
229 check_cookie_url = try_get(
230 tfa_results, lambda x: x[0][-1][2], compat_str)
231 else:
232 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
233
234 if not check_cookie_url:
235 warn('Unable to extract CheckCookie URL')
236 return False
e00eb564
S
237
238 check_cookie_results = self._download_webpage(
3995d37d
S
239 check_cookie_url, None, 'Checking cookie', fatal=False)
240
241 if check_cookie_results is False:
242 return False
e00eb564 243
3995d37d
S
244 if 'https://myaccount.google.com/' not in check_cookie_results:
245 warn('Unable to log in')
b2e8bc1b 246 return False
e00eb564 247
b2e8bc1b
JMF
248 return True
249
30226342 250 def _download_webpage_handle(self, *args, **kwargs):
8d81f3e3 251 kwargs.setdefault('query', {})['disable_polymer'] = 'true'
30226342 252 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
253 *args, **compat_kwargs(kwargs))
254
b2e8bc1b
JMF
255 def _real_initialize(self):
256 if self._downloader is None:
257 return
42939b61 258 self._set_language()
b2e8bc1b
JMF
259 if not self._login():
260 return
c5e8d7af 261
8377574c 262
8e7aad20 263class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 264 # Extract entries from page with "Load more" button
648e6a1f
S
265 def _entries(self, page, playlist_id):
266 more_widget_html = content_html = page
267 for page_num in itertools.count(1):
061a75ed
S
268 for entry in self._process_page(content_html):
269 yield entry
648e6a1f
S
270
271 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
272 if not mobj:
273 break
274
275 more = self._download_json(
276 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
277 'Downloading page #%s' % page_num,
278 transform_source=uppercase_escape)
279 content_html = more['content_html']
280 if not content_html.strip():
281 # Some webpages show a "Load more" button but they don't
282 # have more videos
283 break
284 more_widget_html = more['load_more_widget_html']
285
061a75ed
S
286
287class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
288 def _process_page(self, content):
289 for video_id, video_title in self.extract_videos_from_page(content):
290 yield self.url_result(video_id, 'Youtube', video_id, video_title)
291
648e6a1f
S
292 def extract_videos_from_page(self, page):
293 ids_in_page = []
294 titles_in_page = []
295 for mobj in re.finditer(self._VIDEO_RE, page):
296 # The link with index 0 is not the first video of the playlist (not sure if still actual)
297 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
298 continue
299 video_id = mobj.group('id')
300 video_title = unescapeHTML(mobj.group('title'))
301 if video_title:
302 video_title = video_title.strip()
303 try:
304 idx = ids_in_page.index(video_id)
305 if video_title and not titles_in_page[idx]:
306 titles_in_page[idx] = video_title
307 except ValueError:
308 ids_in_page.append(video_id)
309 titles_in_page.append(video_title)
310 return zip(ids_in_page, titles_in_page)
311
312
061a75ed
S
313class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
314 def _process_page(self, content):
6dee688e
S
315 for playlist_id in orderedSet(re.findall(
316 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
317 content)):
061a75ed
S
318 yield self.url_result(
319 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
320
0c148415
S
321 def _real_extract(self, url):
322 playlist_id = self._match_id(url)
323 webpage = self._download_webpage(url, playlist_id)
0c148415 324 title = self._og_search_title(webpage, fatal=False)
061a75ed 325 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
326
327
360e1ca5 328class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 329 IE_DESC = 'YouTube.com'
cb7dfeea 330 _VALID_URL = r"""(?x)^
c5e8d7af 331 (
edb53e2d 332 (?:https?://|//) # http(s):// or protocol-independent URL
cb7dfeea 333 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
484aaeb2 334 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 335 (?:www\.)?pwnyoutube\.com/|
8b561bfc 336 (?:www\.)?hooktube\.com/|
f7000f3a 337 (?:www\.)?yourepeat\.com/|
e69ae5b9
JMF
338 tube\.majestyc\.net/|
339 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
340 (?:.*?\#/)? # handle anchor (#/) redirect urls
341 (?: # the various things that can precede the ID:
ac7553d0 342 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 343 |(?: # or the v= param in all its forms
f7000f3a 344 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 345 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 346 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
347 v=
348 )
f4b05232 349 ))
cbaed4bb
S
350 |(?:
351 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
352 vid\.plus| # or vid.plus/xxxx
353 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 354 )/
edb53e2d 355 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 356 )
c5e8d7af 357 )? # all until now is optional -> you can pass the naked ID
8963d9c2 358 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
359 (?!.*?\blist=
360 (?:
361 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
362 WL # WL are handled by the watch later IE
363 )
364 )
c5e8d7af 365 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 366 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 367 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26 368 _formats = {
c2d3cb4c 369 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
370 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
371 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
372 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
373 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
374 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
375 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
376 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 377 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 378 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
379 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
380 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
381 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
382 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
383 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 384 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 385 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
386 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 387
388
389 # 3D videos
c2d3cb4c 390 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
391 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
392 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
393 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 394 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
395 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
396 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 397
96fb5605 398 # Apple HTTP Live Streaming
11f12195 399 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 400 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
401 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
402 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
403 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
404 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 405 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
406 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
407
408 # DASH mp4 video
d23028a8
S
409 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
410 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
411 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
412 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
413 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
414 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
415 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
416 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
417 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
418 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
419 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
420 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 421
f6f1fc92 422 # Dash mp4 audio
d23028a8
S
423 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
424 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
425 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
426 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
427 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
428 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
429 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
430
431 # Dash webm
d23028a8
S
432 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
433 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
434 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
435 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
436 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
437 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
438 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
439 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
440 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
441 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
442 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
443 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
444 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
445 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
446 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 447 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
448 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
449 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
450 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
451 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
452 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
453 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
454
455 # Dash webm audio
d23028a8
S
456 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
457 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 458
0857baad 459 # Dash webm audio with opus inside
d23028a8
S
460 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
461 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
462 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 463
ce6b9a2d
PH
464 # RTMP (unnamed)
465 '_rtmp': {'protocol': 'rtmp'},
c5e8d7af 466 }
23d17e4b 467 _SUBTITLE_FORMATS = ('ttml', 'vtt')
836a086c 468
fd5c4aab
S
469 _GEO_BYPASS = False
470
78caa52a 471 IE_NAME = 'youtube'
2eb88d95
PH
472 _TESTS = [
473 {
2d3d2997 474 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
475 'info_dict': {
476 'id': 'BaW_jenozKc',
477 'ext': 'mp4',
478 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
479 'uploader': 'Philipp Hagemeister',
480 'uploader_id': 'phihag',
ec85ded8 481 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
4bc3a23e 482 'upload_date': '20121002',
7caf9830 483 'license': 'Standard YouTube License',
4bc3a23e
PH
484 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
485 'categories': ['Science & Technology'],
000b6b5a 486 'tags': ['youtube-dl'],
556dbe7f 487 'duration': 10,
3e7c1224
PH
488 'like_count': int,
489 'dislike_count': int,
7c80519c 490 'start_time': 1,
297a564b 491 'end_time': 9,
2eb88d95 492 }
0e853ca4 493 },
0e853ca4 494 {
2d3d2997 495 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
4bc3a23e
PH
496 'note': 'Test generic use_cipher_signature video (#897)',
497 'info_dict': {
498 'id': 'UxxajLWwzqY',
499 'ext': 'mp4',
500 'upload_date': '20120506',
501 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 502 'alt_title': 'I Love It (feat. Charli XCX)',
7caf9830 503 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
000b6b5a
S
504 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
505 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
506 'iconic ep', 'iconic', 'love', 'it'],
556dbe7f 507 'duration': 180,
4bc3a23e
PH
508 'uploader': 'Icona Pop',
509 'uploader_id': 'IconaPop',
ec85ded8 510 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
7caf9830 511 'license': 'Standard YouTube License',
0cb58b02 512 'creator': 'Icona Pop',
936784b2
S
513 'track': 'I Love It (feat. Charli XCX)',
514 'artist': 'Icona Pop',
2eb88d95 515 }
c108eb73
JMF
516 },
517 {
4bc3a23e
PH
518 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
519 'note': 'Test VEVO video with age protection (#956)',
520 'info_dict': {
521 'id': '07FYdnEawAQ',
522 'ext': 'mp4',
523 'upload_date': '20130703',
524 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
0cb58b02 525 'alt_title': 'Tunnel Vision',
4bc3a23e 526 'description': 'md5:64249768eec3bc4276236606ea996373',
556dbe7f 527 'duration': 419,
4bc3a23e
PH
528 'uploader': 'justintimberlakeVEVO',
529 'uploader_id': 'justintimberlakeVEVO',
ec85ded8 530 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
7caf9830 531 'license': 'Standard YouTube License',
0cb58b02 532 'creator': 'Justin Timberlake',
7e72694b 533 'track': 'Tunnel Vision',
936784b2 534 'artist': 'Justin Timberlake',
34952f09 535 'age_limit': 18,
c108eb73
JMF
536 }
537 },
fccd3771 538 {
4bc3a23e
PH
539 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
540 'note': 'Embed-only video (#1746)',
541 'info_dict': {
542 'id': 'yZIXLfi8CZQ',
543 'ext': 'mp4',
544 'upload_date': '20120608',
545 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
546 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
547 'uploader': 'SET India',
94bfcd23 548 'uploader_id': 'setindia',
ec85ded8 549 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
7caf9830 550 'license': 'Standard YouTube License',
94bfcd23 551 'age_limit': 18,
fccd3771
PH
552 }
553 },
11b56058 554 {
2d3d2997 555 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
556 'note': 'Use the first video ID in the URL',
557 'info_dict': {
558 'id': 'BaW_jenozKc',
559 'ext': 'mp4',
560 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
561 'uploader': 'Philipp Hagemeister',
562 'uploader_id': 'phihag',
ec85ded8 563 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 564 'upload_date': '20121002',
7caf9830 565 'license': 'Standard YouTube License',
11b56058
PM
566 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
567 'categories': ['Science & Technology'],
568 'tags': ['youtube-dl'],
556dbe7f 569 'duration': 10,
11b56058
PM
570 'like_count': int,
571 'dislike_count': int,
34a7de29
S
572 },
573 'params': {
574 'skip_download': True,
575 },
11b56058 576 },
dd27fd17 577 {
2d3d2997 578 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
579 'note': '256k DASH audio (format 141) via DASH manifest',
580 'info_dict': {
581 'id': 'a9LDPn-MO4I',
582 'ext': 'm4a',
583 'upload_date': '20121002',
584 'uploader_id': '8KVIDEO',
ec85ded8 585 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
586 'description': '',
587 'uploader': '8KVIDEO',
7caf9830 588 'license': 'Standard YouTube License',
4bc3a23e 589 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 590 },
4bc3a23e
PH
591 'params': {
592 'youtube_include_dash_manifest': True,
593 'format': '141',
4919603f 594 },
de3c7fe0 595 'skip': 'format 141 not served anymore',
dd27fd17 596 },
3489b7d2
JMF
597 # DASH manifest with encrypted signature
598 {
78caa52a
PH
599 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
600 'info_dict': {
601 'id': 'IB3lcPjvWLA',
602 'ext': 'm4a',
b766eb27 603 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
eb6793ba 604 'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
556dbe7f 605 'duration': 244,
78caa52a
PH
606 'uploader': 'AfrojackVEVO',
607 'uploader_id': 'AfrojackVEVO',
608 'upload_date': '20131011',
7caf9830 609 'license': 'Standard YouTube License',
3489b7d2 610 },
4bc3a23e 611 'params': {
78caa52a 612 'youtube_include_dash_manifest': True,
de3c7fe0 613 'format': '141/bestaudio[ext=m4a]',
3489b7d2
JMF
614 },
615 },
aaeb86f6
S
616 # JS player signature function name containing $
617 {
618 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
619 'info_dict': {
620 'id': 'nfWlot6h_JM',
621 'ext': 'm4a',
622 'title': 'Taylor Swift - Shake It Off',
0cb58b02 623 'alt_title': 'Shake It Off',
f57b7835 624 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
556dbe7f 625 'duration': 242,
aaeb86f6
S
626 'uploader': 'TaylorSwiftVEVO',
627 'uploader_id': 'TaylorSwiftVEVO',
628 'upload_date': '20140818',
7caf9830 629 'license': 'Standard YouTube License',
0cb58b02 630 'creator': 'Taylor Swift',
aaeb86f6
S
631 },
632 'params': {
633 'youtube_include_dash_manifest': True,
de3c7fe0 634 'format': '141/bestaudio[ext=m4a]',
aaeb86f6
S
635 },
636 },
aa79ac0c
PH
637 # Controversy video
638 {
639 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
640 'info_dict': {
641 'id': 'T4XJQO3qol8',
642 'ext': 'mp4',
556dbe7f 643 'duration': 219,
aa79ac0c 644 'upload_date': '20100909',
eb6793ba 645 'uploader': 'TJ Kirk',
aa79ac0c 646 'uploader_id': 'TheAmazingAtheist',
ec85ded8 647 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
7caf9830 648 'license': 'Standard YouTube License',
aa79ac0c
PH
649 'title': 'Burning Everyone\'s Koran',
650 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
651 }
c522adb1
JMF
652 },
653 # Normal age-gate video (No vevo, embed allowed)
654 {
2d3d2997 655 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
656 'info_dict': {
657 'id': 'HtVdAasjOgU',
658 'ext': 'mp4',
659 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 660 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 661 'duration': 142,
c522adb1
JMF
662 'uploader': 'The Witcher',
663 'uploader_id': 'WitcherGame',
ec85ded8 664 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 665 'upload_date': '20140605',
7caf9830 666 'license': 'Standard YouTube License',
34952f09 667 'age_limit': 18,
c522adb1
JMF
668 },
669 },
fccae2b9
S
670 # Age-gate video with encrypted signature
671 {
2d3d2997 672 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
fccae2b9
S
673 'info_dict': {
674 'id': '6kLq3WMV1nU',
eb6793ba 675 'ext': 'webm',
fccae2b9
S
676 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
677 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
eb6793ba 678 'duration': 246,
fccae2b9
S
679 'uploader': 'LloydVEVO',
680 'uploader_id': 'LloydVEVO',
ec85ded8 681 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 682 'upload_date': '20110629',
7caf9830 683 'license': 'Standard YouTube License',
34952f09 684 'age_limit': 18,
fccae2b9
S
685 },
686 },
774e208f 687 # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
7d02dcfa 688 # YouTube Red ad is not captured for creator
774e208f
PH
689 {
690 'url': '__2ABJjxzNo',
691 'info_dict': {
692 'id': '__2ABJjxzNo',
693 'ext': 'mp4',
556dbe7f 694 'duration': 266,
774e208f
PH
695 'upload_date': '20100430',
696 'uploader_id': 'deadmau5',
ec85ded8 697 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
0cb58b02 698 'creator': 'deadmau5',
774e208f
PH
699 'description': 'md5:12c56784b8032162bb936a5f76d55360',
700 'uploader': 'deadmau5',
7caf9830 701 'license': 'Standard YouTube License',
774e208f 702 'title': 'Deadmau5 - Some Chords (HD)',
0cb58b02 703 'alt_title': 'Some Chords',
774e208f
PH
704 },
705 'expected_warnings': [
706 'DASH manifest missing',
707 ]
e52a40ab
PH
708 },
709 # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
710 {
711 'url': 'lqQg6PlCWgI',
712 'info_dict': {
713 'id': 'lqQg6PlCWgI',
714 'ext': 'mp4',
556dbe7f 715 'duration': 6085,
90227264 716 'upload_date': '20150827',
cbe2bd91 717 'uploader_id': 'olympic',
ec85ded8 718 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
7caf9830 719 'license': 'Standard YouTube License',
cbe2bd91 720 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 721 'uploader': 'Olympic',
cbe2bd91
PH
722 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
723 },
724 'params': {
725 'skip_download': 'requires avconv',
e52a40ab 726 }
cbe2bd91 727 },
6271f1ca
PH
728 # Non-square pixels
729 {
730 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
731 'info_dict': {
732 'id': '_b-2C3KPAM0',
733 'ext': 'mp4',
734 'stretched_ratio': 16 / 9.,
556dbe7f 735 'duration': 85,
6271f1ca
PH
736 'upload_date': '20110310',
737 'uploader_id': 'AllenMeow',
ec85ded8 738 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 739 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 740 'uploader': '孫ᄋᄅ',
7caf9830 741 'license': 'Standard YouTube License',
6271f1ca
PH
742 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
743 },
06b491eb
S
744 },
745 # url_encoded_fmt_stream_map is empty string
746 {
747 'url': 'qEJwOuvDf7I',
748 'info_dict': {
749 'id': 'qEJwOuvDf7I',
f57b7835 750 'ext': 'webm',
06b491eb
S
751 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
752 'description': '',
753 'upload_date': '20150404',
754 'uploader_id': 'spbelect',
755 'uploader': 'Наблюдатели Петербурга',
756 },
757 'params': {
758 'skip_download': 'requires avconv',
e323cf3f
S
759 },
760 'skip': 'This live event has ended.',
06b491eb 761 },
da77d856
S
762 # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
763 {
764 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
765 'info_dict': {
766 'id': 'FIl7x6_3R5Y',
eb6793ba 767 'ext': 'webm',
da77d856
S
768 'title': 'md5:7b81415841e02ecd4313668cde88737a',
769 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 770 'duration': 220,
da77d856
S
771 'upload_date': '20150625',
772 'uploader_id': 'dorappi2000',
ec85ded8 773 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 774 'uploader': 'dorappi2000',
7caf9830 775 'license': 'Standard YouTube License',
eb6793ba 776 'formats': 'mincount:31',
da77d856 777 },
eb6793ba 778 'skip': 'not actual anymore',
2ee8f5d8 779 },
8a1a26ce
YCH
780 # DASH manifest with segment_list
781 {
782 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
783 'md5': '8ce563a1d667b599d21064e982ab9e31',
784 'info_dict': {
785 'id': 'CsmdDsKjzN8',
786 'ext': 'mp4',
17ee98e1 787 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
788 'uploader': 'Airtek',
789 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
790 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
7caf9830 791 'license': 'Standard YouTube License',
8a1a26ce
YCH
792 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
793 },
794 'params': {
795 'youtube_include_dash_manifest': True,
796 'format': '135', # bestvideo
be49068d
S
797 },
798 'skip': 'This live event has ended.',
2ee8f5d8 799 },
cf7e015f
S
800 {
801 # Multifeed videos (multiple cameras), URL is for Main Camera
802 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
803 'info_dict': {
804 'id': 'jqWvoWXjCVs',
805 'title': 'teamPGP: Rocket League Noob Stream',
806 'description': 'md5:dc7872fb300e143831327f1bae3af010',
807 },
808 'playlist': [{
809 'info_dict': {
810 'id': 'jqWvoWXjCVs',
811 'ext': 'mp4',
812 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
813 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 814 'duration': 7335,
cf7e015f
S
815 'upload_date': '20150721',
816 'uploader': 'Beer Games Beer',
817 'uploader_id': 'beergamesbeer',
ec85ded8 818 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 819 'license': 'Standard YouTube License',
cf7e015f
S
820 },
821 }, {
822 'info_dict': {
823 'id': '6h8e8xoXJzg',
824 'ext': 'mp4',
825 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
826 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 827 'duration': 7337,
cf7e015f
S
828 'upload_date': '20150721',
829 'uploader': 'Beer Games Beer',
830 'uploader_id': 'beergamesbeer',
ec85ded8 831 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 832 'license': 'Standard YouTube License',
cf7e015f
S
833 },
834 }, {
835 'info_dict': {
836 'id': 'PUOgX5z9xZw',
837 'ext': 'mp4',
838 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
839 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 840 'duration': 7337,
cf7e015f
S
841 'upload_date': '20150721',
842 'uploader': 'Beer Games Beer',
843 'uploader_id': 'beergamesbeer',
ec85ded8 844 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 845 'license': 'Standard YouTube License',
cf7e015f
S
846 },
847 }, {
848 'info_dict': {
849 'id': 'teuwxikvS5k',
850 'ext': 'mp4',
851 'title': 'teamPGP: Rocket League Noob Stream (zim)',
852 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 853 'duration': 7334,
cf7e015f
S
854 'upload_date': '20150721',
855 'uploader': 'Beer Games Beer',
856 'uploader_id': 'beergamesbeer',
ec85ded8 857 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 858 'license': 'Standard YouTube License',
cf7e015f
S
859 },
860 }],
861 'params': {
862 'skip_download': True,
863 },
cbaed4bb 864 },
f9f49d87
S
865 {
866 # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
867 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
868 'info_dict': {
869 'id': 'gVfLd0zydlo',
870 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
871 },
872 'playlist_count': 2,
be49068d 873 'skip': 'Not multifeed anymore',
f9f49d87 874 },
cbaed4bb 875 {
2d3d2997 876 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 877 'only_matching': True,
0e49d9a6 878 },
6d4fc66b 879 {
2d3d2997 880 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
881 'only_matching': True,
882 },
0e49d9a6 883 {
61f92af1 884 # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
a8776b10
S
885 # Also tests cut-off URL expansion in video description (see
886 # https://github.com/rg3/youtube-dl/issues/1892,
887 # https://github.com/rg3/youtube-dl/issues/8164)
0e49d9a6
LL
888 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
889 'info_dict': {
890 'id': 'lsguqyKfVQg',
891 'ext': 'mp4',
892 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 893 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 894 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 895 'duration': 133,
0e49d9a6
LL
896 'upload_date': '20151119',
897 'uploader_id': 'IronSoulElf',
ec85ded8 898 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 899 'uploader': 'IronSoulElf',
7caf9830 900 'license': 'Standard YouTube License',
eb6793ba
S
901 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
902 'track': 'Dark Walk - Position Music',
903 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
0e49d9a6
LL
904 },
905 'params': {
906 'skip_download': True,
907 },
908 },
61f92af1
S
909 {
910 # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
911 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
912 'only_matching': True,
913 },
313dfc45
LL
914 {
915 # Video with yt:stretch=17:0
916 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
917 'info_dict': {
918 'id': 'Q39EVAstoRM',
919 'ext': 'mp4',
920 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
921 'description': 'md5:ee18a25c350637c8faff806845bddee9',
922 'upload_date': '20151107',
923 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
924 'uploader': 'CH GAMER DROID',
925 },
926 'params': {
927 'skip_download': True,
928 },
be49068d 929 'skip': 'This video does not exist.',
313dfc45 930 },
7caf9830
S
931 {
932 # Video licensed under Creative Commons
933 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
934 'info_dict': {
935 'id': 'M4gD1WSo5mA',
936 'ext': 'mp4',
937 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
938 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 939 'duration': 721,
7caf9830
S
940 'upload_date': '20150127',
941 'uploader_id': 'BerkmanCenter',
ec85ded8 942 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 943 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
944 'license': 'Creative Commons Attribution license (reuse allowed)',
945 },
946 'params': {
947 'skip_download': True,
948 },
949 },
fd050249
S
950 {
951 # Channel-like uploader_url
952 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
953 'info_dict': {
954 'id': 'eQcmzGIKrzg',
955 'ext': 'mp4',
956 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
957 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 958 'duration': 4060,
fd050249 959 'upload_date': '20151119',
eb6793ba 960 'uploader': 'Bernie Sanders',
fd050249 961 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 962 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
963 'license': 'Creative Commons Attribution license (reuse allowed)',
964 },
965 'params': {
966 'skip_download': True,
967 },
968 },
040ac686
S
969 {
970 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
971 'only_matching': True,
7f29cf54
S
972 },
973 {
974 # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
975 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
976 'only_matching': True,
6496ccb4
S
977 },
978 {
979 # Rental video preview
980 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
981 'info_dict': {
982 'id': 'uGpuVWrhIzE',
983 'ext': 'mp4',
984 'title': 'Piku - Trailer',
985 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
986 'upload_date': '20150811',
987 'uploader': 'FlixMatrix',
988 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 989 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
990 'license': 'Standard YouTube License',
991 },
992 'params': {
993 'skip_download': True,
994 },
eb6793ba 995 'skip': 'This video is not available.',
022a5d66 996 },
12afdc2a
S
997 {
998 # YouTube Red video with episode data
999 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1000 'info_dict': {
1001 'id': 'iqKdEhx-dD4',
1002 'ext': 'mp4',
1003 'title': 'Isolation - Mind Field (Ep 1)',
eb6793ba 1004 'description': 'md5:25b78d2f64ae81719f5c96319889b736',
556dbe7f 1005 'duration': 2085,
12afdc2a
S
1006 'upload_date': '20170118',
1007 'uploader': 'Vsauce',
1008 'uploader_id': 'Vsauce',
1009 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1010 'license': 'Standard YouTube License',
1011 'series': 'Mind Field',
1012 'season_number': 1,
1013 'episode_number': 1,
1014 },
1015 'params': {
1016 'skip_download': True,
1017 },
1018 'expected_warnings': [
1019 'Skipping DASH manifest',
1020 ],
1021 },
c7121fa7
S
1022 {
1023 # The following content has been identified by the YouTube community
1024 # as inappropriate or offensive to some audiences.
1025 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1026 'info_dict': {
1027 'id': '6SJNVb0GnPI',
1028 'ext': 'mp4',
1029 'title': 'Race Differences in Intelligence',
1030 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1031 'duration': 965,
1032 'upload_date': '20140124',
1033 'uploader': 'New Century Foundation',
1034 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1035 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1036 'license': 'Standard YouTube License',
c7121fa7
S
1037 },
1038 'params': {
1039 'skip_download': True,
1040 },
1041 },
022a5d66
S
1042 {
1043 # itag 212
1044 'url': '1t24XAntNCY',
1045 'only_matching': True,
fd5c4aab
S
1046 },
1047 {
1048 # geo restricted to JP
1049 'url': 'sJL6WA-aGkQ',
1050 'only_matching': True,
1051 },
d0ba5587
S
1052 {
1053 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1054 'only_matching': True,
1055 },
2eb88d95
PH
1056 ]
1057
e0df6211
PH
1058 def __init__(self, *args, **kwargs):
1059 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1060 self._player_cache = {}
e0df6211 1061
c5e8d7af
PH
1062 def report_video_info_webpage_download(self, video_id):
1063 """Report attempt to download video info webpage."""
69ea8ca4 1064 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1065
c5e8d7af
PH
1066 def report_information_extraction(self, video_id):
1067 """Report attempt to extract video information."""
69ea8ca4 1068 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1069
1070 def report_unavailable_format(self, video_id, format):
1071 """Report extracted video URL."""
69ea8ca4 1072 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1073
1074 def report_rtmp_download(self):
1075 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1076 self.to_screen('RTMP download detected')
c5e8d7af 1077
60064c53
PH
1078 def _signature_cache_id(self, example_sig):
1079 """ Return a string representation of a signature """
78caa52a 1080 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53
PH
1081
1082 def _extract_signature_function(self, video_id, player_url, example_sig):
cf010131 1083 id_m = re.match(
e31fed95 1084 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
cf010131 1085 player_url)
c081b35c
PH
1086 if not id_m:
1087 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
1088 player_type = id_m.group('ext')
1089 player_id = id_m.group('id')
1090
c4417ddb 1091 # Read from filesystem cache
60064c53
PH
1092 func_id = '%s_%s_%s' % (
1093 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1094 assert os.path.basename(func_id) == func_id
a0e07d31 1095
69ea8ca4 1096 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1097 if cache_spec is not None:
78caa52a 1098 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1099
6d1a55a5
PH
1100 download_note = (
1101 'Downloading player %s' % player_url
1102 if self._downloader.params.get('verbose') else
1103 'Downloading %s player %s' % (player_type, player_id)
1104 )
e0df6211
PH
1105 if player_type == 'js':
1106 code = self._download_webpage(
1107 player_url, video_id,
6d1a55a5 1108 note=download_note,
69ea8ca4 1109 errnote='Download of %s failed' % player_url)
83799698 1110 res = self._parse_sig_js(code)
c4417ddb 1111 elif player_type == 'swf':
e0df6211
PH
1112 urlh = self._request_webpage(
1113 player_url, video_id,
6d1a55a5 1114 note=download_note,
69ea8ca4 1115 errnote='Download of %s failed' % player_url)
e0df6211 1116 code = urlh.read()
83799698 1117 res = self._parse_sig_swf(code)
e0df6211
PH
1118 else:
1119 assert False, 'Invalid player type %r' % player_type
1120
785521bf
PH
1121 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1122 cache_res = res(test_string)
1123 cache_spec = [ord(c) for c in cache_res]
83799698 1124
69ea8ca4 1125 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1126 return res
1127
60064c53 1128 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1129 def gen_sig_code(idxs):
1130 def _genslice(start, end, step):
78caa52a 1131 starts = '' if start == 0 else str(start)
8bcc8756 1132 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1133 steps = '' if step == 1 else (':%d' % step)
78caa52a 1134 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1135
1136 step = None
7af808a5
PH
1137 # Quelch pyflakes warnings - start will be set when step is set
1138 start = '(Never used)'
edf3e38e
PH
1139 for i, prev in zip(idxs[1:], idxs[:-1]):
1140 if step is not None:
1141 if i - prev == step:
1142 continue
1143 yield _genslice(start, prev, step)
1144 step = None
1145 continue
1146 if i - prev in [-1, 1]:
1147 step = i - prev
1148 start = prev
1149 continue
1150 else:
78caa52a 1151 yield 's[%d]' % prev
edf3e38e 1152 if step is None:
78caa52a 1153 yield 's[%d]' % i
edf3e38e
PH
1154 else:
1155 yield _genslice(start, i, step)
1156
78caa52a 1157 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1158 cache_res = func(test_string)
edf3e38e 1159 cache_spec = [ord(c) for c in cache_res]
78caa52a 1160 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1161 signature_id_tuple = '(%s)' % (
1162 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1163 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1164 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1165 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1166
e0df6211
PH
1167 def _parse_sig_js(self, jscode):
1168 funcname = self._search_regex(
3c90cc8b
S
1169 (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1170 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),
1171 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1172
1173 jsi = JSInterpreter(jscode)
1174 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1175 return lambda s: initial_function([s])
1176
1177 def _parse_sig_swf(self, file_contents):
54256267 1178 swfi = SWFInterpreter(file_contents)
78caa52a 1179 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1180 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1181 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1182 return lambda s: initial_function([s])
1183
83799698 1184 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1185 """Turn the encrypted s field into a working signature"""
6b37f0be 1186
c8bf86d5 1187 if player_url is None:
69ea8ca4 1188 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1189
69ea8ca4 1190 if player_url.startswith('//'):
78caa52a 1191 player_url = 'https:' + player_url
3c90cc8b
S
1192 elif not re.match(r'https?://', player_url):
1193 player_url = compat_urlparse.urljoin(
1194 'https://www.youtube.com', player_url)
c8bf86d5 1195 try:
62af3a0e 1196 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1197 if player_id not in self._player_cache:
1198 func = self._extract_signature_function(
60064c53 1199 video_id, player_url, s
c8bf86d5
PH
1200 )
1201 self._player_cache[player_id] = func
1202 func = self._player_cache[player_id]
1203 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1204 self._print_sig_code(func, s)
c8bf86d5
PH
1205 return func(s)
1206 except Exception as e:
1207 tb = traceback.format_exc()
1208 raise ExtractorError(
78caa52a 1209 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1210
360e1ca5 1211 def _get_subtitles(self, video_id, webpage):
de7f3446 1212 try:
60e47a26 1213 subs_doc = self._download_xml(
38c2e5b8 1214 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1215 video_id, note=False)
1216 except ExtractorError as err:
9b9c5355 1217 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1218 return {}
de7f3446
JMF
1219
1220 sub_lang_list = {}
60e47a26
JMF
1221 for track in subs_doc.findall('track'):
1222 lang = track.attrib['lang_code']
7e660ac1
LD
1223 if lang in sub_lang_list:
1224 continue
360e1ca5 1225 sub_formats = []
23d17e4b 1226 for ext in self._SUBTITLE_FORMATS:
15707c7e 1227 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1228 'lang': lang,
1229 'v': video_id,
1230 'fmt': ext,
1231 'name': track.attrib['name'].encode('utf-8'),
1232 })
1233 sub_formats.append({
1234 'url': 'https://www.youtube.com/api/timedtext?' + params,
1235 'ext': ext,
1236 })
1237 sub_lang_list[lang] = sub_formats
de7f3446 1238 if not sub_lang_list:
69ea8ca4 1239 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1240 return {}
1241 return sub_lang_list
1242
a72778d3
S
1243 def _get_ytplayer_config(self, video_id, webpage):
1244 patterns = (
526b3b07
S
1245 # User data may contain arbitrary character sequences that may affect
1246 # JSON extraction with regex, e.g. when '};' is contained the second
1247 # regex won't capture the whole JSON. Yet working around by trying more
1248 # concrete regex first keeping in mind proper quoted string handling
1249 # to be implemented in future that will replace this workaround (see
1250 # https://github.com/rg3/youtube-dl/issues/7468,
1251 # https://github.com/rg3/youtube-dl/pull/7599)
a72778d3
S
1252 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1253 r';ytplayer\.config\s*=\s*({.+?});',
1254 )
1255 config = self._search_regex(
1256 patterns, webpage, 'ytplayer.config', default=None)
1257 if config:
1258 return self._parse_json(
1259 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1260
360e1ca5 1261 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1262 """We need the webpage for getting the captions url, pass it as an
1263 argument to speed up the process."""
69ea8ca4 1264 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1265 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1266 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1267 if not player_config:
de7f3446
JMF
1268 self._downloader.report_warning(err_msg)
1269 return {}
de7f3446 1270 try:
0792d563 1271 args = player_config['args']
b78b292f
S
1272 caption_url = args.get('ttsurl')
1273 if caption_url:
1274 timestamp = args['timestamp']
1275 # We get the available subtitles
15707c7e 1276 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1277 'type': 'list',
1278 'tlangs': 1,
1279 'asrs': 1,
1280 })
1281 list_url = caption_url + '&' + list_params
1282 caption_list = self._download_xml(list_url, video_id)
1283 original_lang_node = caption_list.find('track')
1284 if original_lang_node is None:
1285 self._downloader.report_warning('Video doesn\'t have automatic captions')
1286 return {}
1287 original_lang = original_lang_node.attrib['lang_code']
1288 caption_kind = original_lang_node.attrib.get('kind', '')
1289
1290 sub_lang_list = {}
1291 for lang_node in caption_list.findall('target'):
1292 sub_lang = lang_node.attrib['lang_code']
1293 sub_formats = []
1294 for ext in self._SUBTITLE_FORMATS:
15707c7e 1295 params = compat_urllib_parse_urlencode({
b78b292f
S
1296 'lang': original_lang,
1297 'tlang': sub_lang,
1298 'fmt': ext,
1299 'ts': timestamp,
1300 'kind': caption_kind,
1301 })
1302 sub_formats.append({
1303 'url': caption_url + '&' + params,
1304 'ext': ext,
1305 })
1306 sub_lang_list[sub_lang] = sub_formats
1307 return sub_lang_list
1308
ddbb4c5c
S
1309 def make_captions(sub_url, sub_langs):
1310 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1311 caption_qs = compat_parse_qs(parsed_sub_url.query)
1312 captions = {}
1313 for sub_lang in sub_langs:
1314 sub_formats = []
1315 for ext in self._SUBTITLE_FORMATS:
1316 caption_qs.update({
1317 'tlang': [sub_lang],
1318 'fmt': [ext],
1319 })
1320 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1321 query=compat_urllib_parse_urlencode(caption_qs, True)))
1322 sub_formats.append({
1323 'url': sub_url,
1324 'ext': ext,
1325 })
1326 captions[sub_lang] = sub_formats
1327 return captions
1328
1329 # New captions format as of 22.06.2017
1330 player_response = args.get('player_response')
1331 if player_response and isinstance(player_response, compat_str):
1332 player_response = self._parse_json(
1333 player_response, video_id, fatal=False)
1334 if player_response:
1335 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1336 base_url = renderer['captionTracks'][0]['baseUrl']
1337 sub_lang_list = []
1338 for lang in renderer['translationLanguages']:
1339 lang_code = lang.get('languageCode')
1340 if lang_code:
1341 sub_lang_list.append(lang_code)
1342 return make_captions(base_url, sub_lang_list)
1343
b78b292f
S
1344 # Some videos don't provide ttsurl but rather caption_tracks and
1345 # caption_translation_languages (e.g. 20LmZk1hakA)
ddbb4c5c 1346 # Does not used anymore as of 22.06.2017
b78b292f
S
1347 caption_tracks = args['caption_tracks']
1348 caption_translation_languages = args['caption_translation_languages']
1349 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
ddbb4c5c 1350 sub_lang_list = []
b78b292f
S
1351 for lang in caption_translation_languages.split(','):
1352 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1353 sub_lang = lang_qs.get('lc', [None])[0]
ddbb4c5c
S
1354 if sub_lang:
1355 sub_lang_list.append(sub_lang)
1356 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1357 # An extractor error can be raise by the download process if there are
1358 # no automatic captions but there are subtitles
ddbb4c5c 1359 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1360 self._downloader.report_warning(err_msg)
1361 return {}
1362
d77ab8e2
S
1363 def _mark_watched(self, video_id, video_info):
1364 playback_url = video_info.get('videostats_playback_base_url', [None])[0]
1365 if not playback_url:
1366 return
1367 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1368 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1369
1370 # cpn generation algorithm is reverse engineered from base.js.
1371 # In fact it works even with dummy cpn.
1372 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1373 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1374
1375 qs.update({
1376 'ver': ['2'],
1377 'cpn': [cpn],
1378 })
1379 playback_url = compat_urlparse.urlunparse(
15707c7e 1380 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1381
1382 self._download_webpage(
1383 playback_url, video_id, 'Marking watched',
1384 'Unable to mark watched', fatal=False)
1385
66c9fa36
S
1386 @staticmethod
1387 def _extract_urls(webpage):
1388 # Embedded YouTube player
1389 entries = [
1390 unescapeHTML(mobj.group('url'))
1391 for mobj in re.finditer(r'''(?x)
1392 (?:
1393 <iframe[^>]+?src=|
1394 data-video-url=|
1395 <embed[^>]+?src=|
1396 embedSWF\(?:\s*|
1397 <object[^>]+data=|
1398 new\s+SWFObject\(
1399 )
1400 (["\'])
1401 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1402 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1403 \1''', webpage)]
1404
1405 # lazyYT YouTube embed
1406 entries.extend(list(map(
1407 unescapeHTML,
1408 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1409
1410 # Wordpress "YouTube Video Importer" plugin
1411 matches = re.findall(r'''(?x)<div[^>]+
1412 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1413 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1414 entries.extend(m[-1] for m in matches)
1415
1416 return entries
1417
1418 @staticmethod
1419 def _extract_url(webpage):
1420 urls = YoutubeIE._extract_urls(webpage)
1421 return urls[0] if urls else None
1422
97665381
PH
1423 @classmethod
1424 def extract_id(cls, url):
1425 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1426 if mobj is None:
69ea8ca4 1427 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1428 video_id = mobj.group(2)
1429 return video_id
1430
1fb07d10
JG
1431 def _extract_annotations(self, video_id):
1432 url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
69ea8ca4 1433 return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
1fb07d10 1434
9cafc3fd
S
1435 @staticmethod
1436 def _extract_chapters(description, duration):
1437 if not description:
1438 return None
1439 chapter_lines = re.findall(
1440 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1441 description)
1442 if not chapter_lines:
1443 return None
1444 chapters = []
1445 for next_num, (chapter_line, time_point) in enumerate(
1446 chapter_lines, start=1):
1447 start_time = parse_duration(time_point)
1448 if start_time is None:
1449 continue
39d4c1be
S
1450 if start_time > duration:
1451 break
9cafc3fd
S
1452 end_time = (duration if next_num == len(chapter_lines)
1453 else parse_duration(chapter_lines[next_num][1]))
1454 if end_time is None:
1455 continue
39d4c1be
S
1456 if end_time > duration:
1457 end_time = duration
1458 if start_time > end_time:
1459 break
9cafc3fd
S
1460 chapter_title = re.sub(
1461 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1462 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1463 chapters.append({
1464 'start_time': start_time,
1465 'end_time': end_time,
1466 'title': chapter_title,
1467 })
1468 return chapters
1469
c5e8d7af 1470 def _real_extract(self, url):
cf7e015f
S
1471 url, smuggled_data = unsmuggle_url(url, {})
1472
7e8c0af0 1473 proto = (
78caa52a
PH
1474 'http' if self._downloader.params.get('prefer_insecure', False)
1475 else 'https')
7e8c0af0 1476
7c80519c 1477 start_time = None
297a564b 1478 end_time = None
7c80519c
JMF
1479 parsed_url = compat_urllib_parse_urlparse(url)
1480 for component in [parsed_url.fragment, parsed_url.query]:
1481 query = compat_parse_qs(component)
297a564b 1482 if start_time is None and 't' in query:
7c80519c 1483 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1484 if start_time is None and 'start' in query:
1485 start_time = parse_duration(query['start'][0])
297a564b
JMF
1486 if end_time is None and 'end' in query:
1487 end_time = parse_duration(query['end'][0])
7c80519c 1488
c5e8d7af
PH
1489 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1490 mobj = re.search(self._NEXT_URL_RE, url)
1491 if mobj:
7fd002c0 1492 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1493 video_id = self.extract_id(url)
c5e8d7af
PH
1494
1495 # Get video webpage
aa79ac0c 1496 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
a1f934b1 1497 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
1498
1499 # Attempt to extract SWF player URL
e0df6211 1500 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1501 if mobj is not None:
1502 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1503 else:
1504 player_url = None
1505
d8d24a92
S
1506 dash_mpds = []
1507
1508 def add_dash_mpd(video_info):
1509 dash_mpd = video_info.get('dashmpd')
1510 if dash_mpd and dash_mpd[0] not in dash_mpds:
1511 dash_mpds.append(dash_mpd[0])
1512
c7121fa7
S
1513 is_live = None
1514 view_count = None
1515
1516 def extract_view_count(v_info):
1517 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1518
c5e8d7af 1519 # Get video info
6449cd80 1520 embed_webpage = None
c108eb73 1521 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
1522 age_gate = True
1523 # We simulate the access to the video from www.youtube.com/v/{video_id}
1524 # this can be viewed without login into Youtube
beb95e77
CL
1525 url = proto + '://www.youtube.com/embed/%s' % video_id
1526 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
15707c7e 1527 data = compat_urllib_parse_urlencode({
2c57c7fa
JMF
1528 'video_id': video_id,
1529 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 1530 'sts': self._search_regex(
beb95e77 1531 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 1532 })
7e8c0af0 1533 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
94bd3613
PH
1534 video_info_webpage = self._download_webpage(
1535 video_info_url, video_id,
20436c30 1536 note='Refetching age-gated info webpage',
94bd3613 1537 errnote='unable to download video info webpage')
c5e8d7af 1538 video_info = compat_parse_qs(video_info_webpage)
d8d24a92 1539 add_dash_mpd(video_info)
c108eb73
JMF
1540 else:
1541 age_gate = False
bc93bdb5 1542 video_info = None
dc4e4f90 1543 sts = None
d8d24a92 1544 # Try looking directly into the video webpage
a72778d3
S
1545 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1546 if ytplayer_config:
4e62ebe2 1547 args = ytplayer_config['args']
4c76aa06 1548 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1549 # Convert to the same format returned by compat_parse_qs
1550 video_info = dict((k, [v]) for k, v in args.items())
1551 add_dash_mpd(video_info)
6496ccb4
S
1552 # Rental video is not rented but preview is available (e.g.
1553 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1554 # https://github.com/rg3/youtube-dl/issues/10532)
1555 if not video_info and args.get('ypc_vid'):
1556 return self.url_result(
1557 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1558 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1559 is_live = True
dc4e4f90 1560 sts = ytplayer_config.get('sts')
0a3cf9ad
S
1561 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1562 # We also try looking in get_video_info since it may contain different dashmpd
1563 # URL that points to a DASH manifest with possibly different itag set (some itags
1564 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1565 # manifest pointed by get_video_info's dashmpd).
1566 # The general idea is to take a union of itags of both DASH manifests (for example
1567 # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
4e62ebe2 1568 self.report_video_info_webpage_download(video_id)
dc4e4f90
S
1569 for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
1570 query = {
1571 'video_id': video_id,
1572 'ps': 'default',
1573 'eurl': '',
1574 'gl': 'US',
1575 'hl': 'en',
1576 }
1577 if el:
1578 query['el'] = el
1579 if sts:
1580 query['sts'] = sts
810fb84d 1581 video_info_webpage = self._download_webpage(
dc4e4f90 1582 '%s://www.youtube.com/get_video_info' % proto,
4e62ebe2 1583 video_id, note=False,
dc4e4f90
S
1584 errnote='unable to download video info webpage',
1585 fatal=False, query=query)
1586 if not video_info_webpage:
1587 continue
0a3cf9ad 1588 get_video_info = compat_parse_qs(video_info_webpage)
fd545fc6 1589 add_dash_mpd(get_video_info)
c7121fa7
S
1590 if view_count is None:
1591 view_count = extract_view_count(get_video_info)
0a3cf9ad
S
1592 if not video_info:
1593 video_info = get_video_info
1594 if 'token' in get_video_info:
89ea063e
S
1595 # Different get_video_info requests may report different results, e.g.
1596 # some may report video unavailability, but some may serve it without
1597 # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
1598 # the original webpage as well as el=info and el=embedded get_video_info
1599 # requests report video unavailability due to geo restriction while
1600 # el=detailpage succeeds and returns valid data). This is probably
1601 # due to YouTube measures against IP ranges of hosting providers.
1602 # Working around by preferring the first succeeded video_info containing
1603 # the token if no such video_info yet was found.
44b2264f
S
1604 if 'token' not in video_info:
1605 video_info = get_video_info
4e62ebe2 1606 break
bbb7c3f7
YCH
1607
1608 def extract_unavailable_message():
1609 return self._html_search_regex(
1610 r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
1611 video_webpage, 'unavailable message', default=None)
1612
c5e8d7af
PH
1613 if 'token' not in video_info:
1614 if 'reason' in video_info:
af214c3a 1615 if 'The uploader has not made this video available in your country.' in video_info['reason']:
fd5c4aab
S
1616 regions_allowed = self._html_search_meta(
1617 'regionsAllowed', video_webpage, default=None)
1618 countries = regions_allowed.split(',') if regions_allowed else None
1619 self.raise_geo_restricted(
1620 msg=video_info['reason'][0], countries=countries)
bbb7c3f7
YCH
1621 reason = video_info['reason'][0]
1622 if 'Invalid parameters' in reason:
1623 unavailable_message = extract_unavailable_message()
1624 if unavailable_message:
1625 reason = unavailable_message
d11271dd 1626 raise ExtractorError(
bbb7c3f7 1627 'YouTube said: %s' % reason,
d11271dd 1628 expected=True, video_id=video_id)
c5e8d7af 1629 else:
d11271dd 1630 raise ExtractorError(
78caa52a 1631 '"token" parameter not in video info for unknown reason',
d11271dd 1632 video_id=video_id)
c5e8d7af 1633
cf7e015f
S
1634 # title
1635 if 'title' in video_info:
1636 video_title = video_info['title'][0]
1637 else:
1638 self._downloader.report_warning('Unable to extract video title')
1639 video_title = '_'
1640
1641 # description
9cafc3fd 1642 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1643 if video_description:
fa4bc6e7
RA
1644
1645 def replace_url(m):
1646 redir_url = compat_urlparse.urljoin(url, m.group(1))
1647 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1648 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1649 qs = compat_parse_qs(parsed_redir_url.query)
1650 q = qs.get('q')
1651 if q and q[0]:
1652 return q[0]
1653 return redir_url
1654
9cafc3fd 1655 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1656 <a\s+
25cb7a0e 1657 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1658 (?:title|href)="([^"]+)"\s+
25cb7a0e 1659 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1660 class="[^"]*"[^>]*>
23f13e97 1661 [^<]+\.{3}\s*
cf7e015f 1662 </a>
fa4bc6e7 1663 ''', replace_url, video_description)
cf7e015f
S
1664 video_description = clean_html(video_description)
1665 else:
1666 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
1667 if fd_mobj:
1668 video_description = unescapeHTML(fd_mobj.group(1))
1669 else:
1670 video_description = ''
1671
5e1eddb9
S
1672 if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
1673 if not self._downloader.params.get('noplaylist'):
1674 entries = []
1675 feed_ids = []
6863631c 1676 multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
5e1eddb9 1677 for feed in multifeed_metadata_list.split(','):
6863631c
S
1678 # Unquote should take place before split on comma (,) since textual
1679 # fields may contain comma as well (see
1680 # https://github.com/rg3/youtube-dl/issues/8536)
1681 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
5e1eddb9
S
1682 entries.append({
1683 '_type': 'url_transparent',
1684 'ie_key': 'Youtube',
1685 'url': smuggle_url(
1686 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1687 {'force_singlefeed': True}),
1688 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1689 })
1690 feed_ids.append(feed_data['id'][0])
1691 self.to_screen(
1692 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1693 % (', '.join(feed_ids), video_id))
1694 return self.playlist_result(entries, video_id, video_title, video_description)
1695 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1696
c7121fa7 1697 if view_count is None:
1c9c8de2 1698 view_count = extract_view_count(video_info)
1d699755 1699
c5e8d7af
PH
1700 # Check for "rental" videos
1701 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
c9612c04 1702 raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 1703
c63ca0ee
S
1704 def _extract_filesize(media_url):
1705 return int_or_none(self._search_regex(
1706 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1707
c5e8d7af
PH
1708 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1709 self.report_rtmp_download()
dd27fd17
PH
1710 formats = [{
1711 'format_id': '_rtmp',
1712 'protocol': 'rtmp',
1713 'url': video_info['conn'][0],
1714 'player_url': player_url,
1715 }]
391dd6f0 1716 elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 1717 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1718 if 'rtmpe%3Dyes' in encoded_url_map:
a7055eb9 1719 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
3318832e 1720 formats_spec = {}
82156fdb 1721 fmt_list = video_info.get('fmt_list', [''])[0]
1722 if fmt_list:
1723 for fmt in fmt_list.split(','):
1724 spec = fmt.split('/')
3318832e 1725 if len(spec) > 1:
1726 width_height = spec[1].split('x')
1727 if len(width_height) == 2:
1728 formats_spec[spec[0]] = {
1729 'resolution': spec[1],
1730 'width': int_or_none(width_height[0]),
1731 'height': int_or_none(width_height[1]),
1732 }
54fc90aa 1733 q = qualities(['small', 'medium', 'hd720'])
c9afb51c 1734 formats = []
00fe14fc 1735 for url_data_str in encoded_url_map.split(','):
c5e8d7af 1736 url_data = compat_parse_qs(url_data_str)
201e9eaa
PH
1737 if 'itag' not in url_data or 'url' not in url_data:
1738 continue
1739 format_id = url_data['itag'][0]
1740 url = url_data['url'][0]
1741
a49eccdf 1742 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
6449cd80 1743 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
beb95e77 1744 jsplayer_url_json = self._search_regex(
6449cd80
PH
1745 ASSETS_RE,
1746 embed_webpage if age_gate else video_webpage,
1747 'JS player URL (1)', default=None)
1748 if not jsplayer_url_json and not age_gate:
1749 # We need the embed website after all
1750 if embed_webpage is None:
1751 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1752 embed_webpage = self._download_webpage(
1753 embed_url, video_id, 'Downloading embed webpage')
1754 jsplayer_url_json = self._search_regex(
1755 ASSETS_RE, embed_webpage, 'JS player URL')
1756
beb95e77 1757 player_url = json.loads(jsplayer_url_json)
201e9eaa
PH
1758 if player_url is None:
1759 player_url_json = self._search_regex(
1760 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
78caa52a 1761 video_webpage, 'age gate player URL')
201e9eaa
PH
1762 player_url = json.loads(player_url_json)
1763
a49eccdf
YCH
1764 if 'sig' in url_data:
1765 url += '&signature=' + url_data['sig'][0]
1766 elif 's' in url_data:
1767 encrypted_sig = url_data['s'][0]
1768
201e9eaa 1769 if self._downloader.params.get('verbose'):
cf010131 1770 if player_url is None:
201e9eaa
PH
1771 player_version = 'unknown'
1772 player_desc = 'unknown'
1773 else:
1774 if player_url.endswith('swf'):
1775 player_version = self._search_regex(
1776 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
78caa52a 1777 'flash player', fatal=False)
201e9eaa 1778 player_desc = 'flash player %s' % player_version
cf010131 1779 else:
201e9eaa 1780 player_version = self._search_regex(
b62985a9
YCH
1781 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
1782 r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
201e9eaa
PH
1783 player_url,
1784 'html5 player', fatal=False)
78caa52a 1785 player_desc = 'html5 player %s' % player_version
201e9eaa 1786
60064c53 1787 parts_sizes = self._signature_cache_id(encrypted_sig)
69ea8ca4 1788 self.to_screen('{%s} signature length %s, %s' %
9e1a5b84 1789 (format_id, parts_sizes, player_desc))
201e9eaa
PH
1790
1791 signature = self._decrypt_signature(
1792 encrypted_sig, video_id, player_url, age_gate)
1793 url += '&signature=' + signature
1794 if 'ratebypass' not in url:
1795 url += '&ratebypass=yes'
c9afb51c 1796
94278f72
YCH
1797 dct = {
1798 'format_id': format_id,
1799 'url': url,
1800 'player_url': player_url,
1801 }
1802 if format_id in self._formats:
1803 dct.update(self._formats[format_id])
3318832e 1804 if format_id in formats_spec:
1805 dct.update(formats_spec[format_id])
94278f72 1806
aabc2be6
S
1807 # Some itags are not included in DASH manifest thus corresponding formats will
1808 # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
1809 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
1810 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
1811 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 1812
c63ca0ee
S
1813 filesize = int_or_none(url_data.get(
1814 'clen', [None])[0]) or _extract_filesize(url)
1815
54fc90aa
RA
1816 quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
1817
94278f72 1818 more_fields = {
c63ca0ee 1819 'filesize': filesize,
aabc2be6 1820 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
c9afb51c
AH
1821 'width': width,
1822 'height': height,
1823 'fps': int_or_none(url_data.get('fps', [None])[0]),
54fc90aa
RA
1824 'format_note': quality,
1825 'quality': q(quality),
c9afb51c 1826 }
94278f72
YCH
1827 for key, value in more_fields.items():
1828 if value:
1829 dct[key] = value
aabc2be6
S
1830 type_ = url_data.get('type', [None])[0]
1831 if type_:
1832 type_split = type_.split(';')
1833 kind_ext = type_split[0].split('/')
1834 if len(kind_ext) == 2:
94278f72
YCH
1835 kind, _ = kind_ext
1836 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
1837 if kind in ('audio', 'video'):
1838 codecs = None
1839 for mobj in re.finditer(
1840 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
1841 if mobj.group('key') == 'codecs':
1842 codecs = mobj.group('val')
1843 break
1844 if codecs:
6310acf5 1845 dct.update(parse_codecs(codecs))
e4a60912
S
1846 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
1847 dct['downloader_options'] = {
1848 # Youtube throttles chunks >~10M
1849 'http_chunk_size': 10485760,
1850 }
aabc2be6 1851 formats.append(dct)
1d043b93
JMF
1852 elif video_info.get('hlsvp'):
1853 manifest_url = video_info['hlsvp'][0]
89beedd3
RA
1854 formats = []
1855 m3u8_formats = self._extract_m3u8_formats(
1856 manifest_url, video_id, 'mp4', fatal=False)
1857 for a_format in m3u8_formats:
1858 itag = self._search_regex(
1859 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
1860 if itag:
1861 a_format['format_id'] = itag
1862 if itag in self._formats:
1863 dct = self._formats[itag].copy()
1864 dct.update(a_format)
1865 a_format = dct
1866 a_format['player_url'] = player_url
1867 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
049d71d8 1868 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
89beedd3 1869 formats.append(a_format)
c5e8d7af 1870 else:
4c76aa06
RA
1871 error_message = clean_html(video_info.get('reason', [None])[0])
1872 if not error_message:
1873 error_message = extract_unavailable_message()
1874 if error_message:
1875 raise ExtractorError(error_message, expected=True)
69ea8ca4 1876 raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 1877
7e72694b
S
1878 # uploader
1879 video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
1880 if video_uploader:
1881 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
1882 else:
1883 self._downloader.report_warning('unable to extract uploader name')
1884
1885 # uploader_id
1886 video_uploader_id = None
1887 video_uploader_url = None
1888 mobj = re.search(
1889 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
1890 video_webpage)
1891 if mobj is not None:
1892 video_uploader_id = mobj.group('uploader_id')
1893 video_uploader_url = mobj.group('uploader_url')
1894 else:
1895 self._downloader.report_warning('unable to extract uploader nickname')
1896
1897 # thumbnail image
1898 # We try first to get a high quality image:
1899 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
1900 video_webpage, re.DOTALL)
1901 if m_thumb is not None:
1902 video_thumbnail = m_thumb.group(1)
1903 elif 'thumbnail_url' not in video_info:
1904 self._downloader.report_warning('unable to extract video thumbnail')
1905 video_thumbnail = None
1906 else: # don't panic if we can't find it
1907 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
1908
1909 # upload date
1910 upload_date = self._html_search_meta(
1911 'datePublished', video_webpage, 'upload date', default=None)
1912 if not upload_date:
1913 upload_date = self._search_regex(
1914 [r'(?s)id="eow-date.*?>(.*?)</span>',
1915 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
1916 video_webpage, 'upload date', default=None)
1917 upload_date = unified_strdate(upload_date)
1918
1919 video_license = self._html_search_regex(
1920 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
1921 video_webpage, 'license', default=None)
1922
1923 m_music = re.search(
1924 r'''(?x)
1925 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
1926 <ul[^>]*>\s*
1927 <li>(?P<title>.+?)
1928 by (?P<creator>.+?)
1929 (?:
1930 \(.+?\)|
1931 <a[^>]*
1932 (?:
1933 \bhref=["\']/red[^>]*>| # drop possible
1934 >\s*Listen ad-free with YouTube Red # YouTube Red ad
1935 )
1936 .*?
1937 )?</li
1938 ''',
1939 video_webpage)
1940 if m_music:
1941 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
1942 video_creator = clean_html(m_music.group('creator'))
1943 else:
1944 video_alt_title = video_creator = None
1945
1946 def extract_meta(field):
1947 return self._html_search_regex(
1948 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
1949 video_webpage, field, default=None)
1950
1951 track = extract_meta('Song')
1952 artist = extract_meta('Artist')
1953
1954 m_episode = re.search(
1955 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
1956 video_webpage)
1957 if m_episode:
1958 series = m_episode.group('series')
1959 season_number = int(m_episode.group('season'))
1960 episode_number = int(m_episode.group('episode'))
1961 else:
1962 series = season_number = episode_number = None
1963
1964 m_cat_container = self._search_regex(
1965 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
1966 video_webpage, 'categories', default=None)
1967 if m_cat_container:
1968 category = self._html_search_regex(
1969 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
1970 default=None)
1971 video_categories = None if category is None else [category]
1972 else:
1973 video_categories = None
1974
1975 video_tags = [
1976 unescapeHTML(m.group('content'))
1977 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
1978
1979 def _extract_count(count_name):
1980 return str_to_int(self._search_regex(
1981 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
1982 % re.escape(count_name),
1983 video_webpage, count_name, default=None))
1984
1985 like_count = _extract_count('like')
1986 dislike_count = _extract_count('dislike')
1987
1988 # subtitles
1989 video_subtitles = self.extract_subtitles(video_id, video_webpage)
1990 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
1991
1992 video_duration = try_get(
1993 video_info, lambda x: int_or_none(x['length_seconds'][0]))
1994 if not video_duration:
1995 video_duration = parse_duration(self._html_search_meta(
1996 'duration', video_webpage, 'video duration'))
1997
1998 # annotations
1999 video_annotations = None
2000 if self._downloader.params.get('writeannotations', False):
2001 video_annotations = self._extract_annotations(video_id)
2002
2003 chapters = self._extract_chapters(description_original, video_duration)
2004
dd27fd17 2005 # Look for the DASH manifest
203fb43f 2006 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2007 dash_mpd_fatal = True
8ff648e4 2008 for mpd_url in dash_mpds:
d8d24a92 2009 dash_formats = {}
774e208f 2010 try:
05d0d131
YCH
2011 def decrypt_sig(mobj):
2012 s = mobj.group(1)
2013 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2014 return '/signature/%s' % dec_s
2015
8ff648e4 2016 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2017
8ff648e4 2018 for df in self._extract_mpd_formats(
2019 mpd_url, video_id, fatal=dash_mpd_fatal,
2020 formats_dict=self._formats):
c63ca0ee
S
2021 if not df.get('filesize'):
2022 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2023 # Do not overwrite DASH format found in some previous DASH manifest
2024 if df['format_id'] not in dash_formats:
2025 dash_formats[df['format_id']] = df
77c6fb5b
S
2026 # Additional DASH manifests may end up in HTTP Error 403 therefore
2027 # allow them to fail without bug report message if we already have
2028 # some DASH manifest succeeded. This is temporary workaround to reduce
2029 # burst of bug reports until we figure out the reason and whether it
2030 # can be fixed at all.
2031 dash_mpd_fatal = False
774e208f
PH
2032 except (ExtractorError, KeyError) as e:
2033 self.report_warning(
2034 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2035 if dash_formats:
04b3b3df
JMF
2036 # Remove the formats we found through non-DASH, they
2037 # contain less info and it can be wrong, because we use
2038 # fixed values (for example the resolution). See
2039 # https://github.com/rg3/youtube-dl/issues/5774 for an
2040 # example.
d80265cc 2041 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2042 formats.extend(dash_formats.values())
d80044c2 2043
6271f1ca
PH
2044 # Check for malformed aspect ratio
2045 stretched_m = re.search(
2046 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2047 video_webpage)
2048 if stretched_m:
313dfc45
LL
2049 w = float(stretched_m.group('w'))
2050 h = float(stretched_m.group('h'))
5faf9fed
S
2051 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2052 # We will only process correct ratios.
313dfc45 2053 if w > 0 and h > 0:
41f24c32 2054 ratio = w / h
313dfc45
LL
2055 for f in formats:
2056 if f.get('vcodec') != 'none':
2057 f['stretched_ratio'] = ratio
6271f1ca 2058
4bcc7bd1 2059 self._sort_formats(formats)
4ea3be0a 2060
d77ab8e2
S
2061 self.mark_watched(video_id, video_info)
2062
4ea3be0a 2063 return {
8bcc8756
JW
2064 'id': video_id,
2065 'uploader': video_uploader,
2066 'uploader_id': video_uploader_id,
fd050249 2067 'uploader_url': video_uploader_url,
8bcc8756 2068 'upload_date': upload_date,
7caf9830 2069 'license': video_license,
936784b2 2070 'creator': video_creator or artist,
8bcc8756 2071 'title': video_title,
936784b2 2072 'alt_title': video_alt_title or track,
8bcc8756
JW
2073 'thumbnail': video_thumbnail,
2074 'description': video_description,
2075 'categories': video_categories,
000b6b5a 2076 'tags': video_tags,
8bcc8756 2077 'subtitles': video_subtitles,
360e1ca5 2078 'automatic_captions': automatic_captions,
8bcc8756
JW
2079 'duration': video_duration,
2080 'age_limit': 18 if age_gate else 0,
2081 'annotations': video_annotations,
9cafc3fd 2082 'chapters': chapters,
7e8c0af0 2083 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2084 'view_count': view_count,
4ea3be0a 2085 'like_count': like_count,
2086 'dislike_count': dislike_count,
2d30521a 2087 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
8bcc8756 2088 'formats': formats,
2fe1ff85 2089 'is_live': is_live,
7c80519c 2090 'start_time': start_time,
297a564b 2091 'end_time': end_time,
12afdc2a
S
2092 'series': series,
2093 'season_number': season_number,
2094 'episode_number': episode_number,
936784b2
S
2095 'track': track,
2096 'artist': artist,
4ea3be0a 2097 }
c5e8d7af 2098
5f6a1245 2099
8e7aad20 2100class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2101 IE_DESC = 'YouTube.com playlists'
d67cc9fa 2102 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
2103 (?:https?://)?
2104 (?:\w+\.)?
c5e8d7af 2105 (?:
feaa5ad7
S
2106 youtube\.com/
2107 (?:
87dadd45 2108 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
feaa5ad7
S
2109 \? (?:.*?[&;])*? (?:p|a|list)=
2110 | p/
2111 )|
2112 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
c5e8d7af 2113 )
d67cc9fa 2114 (
a6857510 2115 (?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,}
5f6a1245 2116 # Top tracks, they can also include dots
d67cc9fa
JMF
2117 |(?:MC)[\w\.]*
2118 )
c5e8d7af
PH
2119 .*
2120 |
d0ba5587
S
2121 (%(playlist_id)s)
2122 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
8d81f3e3 2123 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
648e6a1f 2124 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
78caa52a 2125 IE_NAME = 'youtube:playlist'
81127aa5
PH
2126 _TESTS = [{
2127 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2128 'info_dict': {
2129 'title': 'ytdl test PL',
a1cf99d0 2130 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
81127aa5
PH
2131 },
2132 'playlist_count': 3,
9291475f
PH
2133 }, {
2134 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2135 'info_dict': {
acf757f4 2136 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
9291475f
PH
2137 'title': 'YDL_Empty_List',
2138 },
2139 'playlist_count': 0,
4201ba13 2140 'skip': 'This playlist is private',
9291475f
PH
2141 }, {
2142 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2143 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2144 'info_dict': {
2145 'title': '29C3: Not my department',
acf757f4 2146 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
9291475f
PH
2147 },
2148 'playlist_count': 95,
2149 }, {
2150 'note': 'issue #673',
2151 'url': 'PLBB231211A4F62143',
2152 'info_dict': {
f46a8702 2153 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 2154 'id': 'PLBB231211A4F62143',
9291475f
PH
2155 },
2156 'playlist_mincount': 26,
2157 }, {
2158 'note': 'Large playlist',
2159 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2160 'info_dict': {
2161 'title': 'Uploads from Cauchemar',
acf757f4 2162 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
9291475f
PH
2163 },
2164 'playlist_mincount': 799,
2165 }, {
2166 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2167 'info_dict': {
2168 'title': 'YDL_safe_search',
acf757f4 2169 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
2170 },
2171 'playlist_count': 2,
4201ba13 2172 'skip': 'This playlist is private',
ac7553d0
PH
2173 }, {
2174 'note': 'embedded',
2d3d2997 2175 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0
PH
2176 'playlist_count': 4,
2177 'info_dict': {
2178 'title': 'JODA15',
acf757f4 2179 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0 2180 }
87dadd45
S
2181 }, {
2182 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2183 'playlist_mincount': 485,
2184 'info_dict': {
2185 'title': '2017 華語最新單曲 (2/24更新)',
2186 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2187 }
6b08cdf6
PH
2188 }, {
2189 'note': 'Embedded SWF player',
2d3d2997 2190 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
6b08cdf6
PH
2191 'playlist_count': 4,
2192 'info_dict': {
2193 'title': 'JODA7',
acf757f4 2194 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
6b08cdf6 2195 }
4b7df0d3
JMF
2196 }, {
2197 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2198 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2199 'info_dict': {
acf757f4
PH
2200 'title': 'Uploads from Interstellar Movie',
2201 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2202 },
481cc733 2203 'playlist_mincount': 21,
dacb3a86
S
2204 }, {
2205 # Playlist URL that does not actually serve a playlist
2206 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2207 'info_dict': {
2208 'id': 'FqZTN594JQw',
2209 'ext': 'webm',
2210 'title': "Smiley's People 01 detective, Adventure Series, Action",
2211 'uploader': 'STREEM',
2212 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2213 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2214 'upload_date': '20150526',
2215 'license': 'Standard YouTube License',
2216 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2217 'categories': ['People & Blogs'],
2218 'tags': list,
2219 'like_count': int,
2220 'dislike_count': int,
2221 },
2222 'params': {
2223 'skip_download': True,
2224 },
2225 'add_ie': [YoutubeIE.ie_key()],
481cc733
S
2226 }, {
2227 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2228 'info_dict': {
2229 'id': 'yeWKywCrFtk',
2230 'ext': 'mp4',
2231 'title': 'Small Scale Baler and Braiding Rugs',
2232 'uploader': 'Backus-Page House Museum',
2233 'uploader_id': 'backuspagemuseum',
ec85ded8 2234 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
481cc733
S
2235 'upload_date': '20161008',
2236 'license': 'Standard YouTube License',
2237 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2238 'categories': ['Nonprofits & Activism'],
2239 'tags': list,
2240 'like_count': int,
2241 'dislike_count': int,
2242 },
2243 'params': {
2244 'noplaylist': True,
2245 'skip_download': True,
2246 },
feaa5ad7
S
2247 }, {
2248 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2249 'only_matching': True,
a6857510
S
2250 }, {
2251 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2252 'only_matching': True,
81127aa5 2253 }]
c5e8d7af 2254
880e1c52
JMF
2255 def _real_initialize(self):
2256 self._login()
2257
652cdaa2 2258 def _extract_mix(self, playlist_id):
99209c29 2259 # The mixes are generated from a single video
652cdaa2 2260 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
2261 ids = []
2262 last_id = playlist_id[-11:]
2263 for n in itertools.count(1):
2264 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2265 webpage = self._download_webpage(
2266 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2267 new_ids = orderedSet(re.findall(
2268 r'''(?xs)data-video-username=".*?".*?
2269 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2270 webpage))
2271 # Fetch new pages until all the videos are repeated, it seems that
2272 # there are always 51 unique videos.
2273 new_ids = [_id for _id in new_ids if _id not in ids]
2274 if not new_ids:
2275 break
2276 ids.extend(new_ids)
2277 last_id = ids[-1]
2278
2279 url_results = self._ids_to_results(ids)
2280
bc2f773b 2281 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5
PH
2282 title_span = (
2283 search_title('playlist-title') or
2284 search_title('title long-title') or
2285 search_title('title'))
76d1700b 2286 title = clean_html(title_span)
652cdaa2
JMF
2287
2288 return self.playlist_result(url_results, playlist_id, title)
2289
448830ce 2290 def _extract_playlist(self, playlist_id):
dbb94fb0
S
2291 url = self._TEMPLATE_URL % playlist_id
2292 page = self._download_webpage(url, playlist_id)
dbb94fb0 2293
8bc0800d
G
2294 # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
2295 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
39b62db1
YCH
2296 match = match.strip()
2297 # Check if the playlist exists or is private
4201ba13
S
2298 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2299 if mobj:
2300 reason = mobj.group('reason')
2301 message = 'This playlist %s' % reason
2302 if 'private' in reason:
2303 message += ', use --username or --netrc to access it'
2304 message += '.'
2305 raise ExtractorError(message, expected=True)
39b62db1
YCH
2306 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2307 raise ExtractorError(
2308 'Invalid parameters. Maybe URL is incorrect.',
2309 expected=True)
2310 elif re.match(r'[^<]*Choose your language[^<]*', match):
2311 continue
2312 else:
2313 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 2314
dbb94fb0 2315 playlist_title = self._html_search_regex(
63b4295d 2316 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
dacb3a86 2317 page, 'title', default=None)
c5e8d7af 2318
07aeced6
S
2319 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2320 uploader = self._search_regex(
2321 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2322 page, 'uploader', default=None)
2323 mobj = re.search(
2324 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2325 page)
2326 if mobj:
2327 uploader_id = mobj.group('uploader_id')
2328 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2329 else:
2330 uploader_id = uploader_url = None
2331
dacb3a86
S
2332 has_videos = True
2333
2334 if not playlist_title:
2335 try:
2336 # Some playlist URLs don't actually serve a playlist (e.g.
2337 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2338 next(self._entries(page, playlist_id))
2339 except StopIteration:
2340 has_videos = False
2341
07aeced6 2342 playlist = self.playlist_result(
dacb3a86 2343 self._entries(page, playlist_id), playlist_id, playlist_title)
07aeced6
S
2344 playlist.update({
2345 'uploader': uploader,
2346 'uploader_id': uploader_id,
2347 'uploader_url': uploader_url,
2348 })
2349
2350 return has_videos, playlist
c5e8d7af 2351
ebf1b291 2352 def _check_download_just_video(self, url, playlist_id):
448830ce
S
2353 # Check if it's a video-specific URL
2354 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
481cc733 2355 video_id = query_dict.get('v', [None])[0] or self._search_regex(
87dadd45 2356 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
481cc733
S
2357 'video id', default=None)
2358 if video_id:
448830ce
S
2359 if self._downloader.params.get('noplaylist'):
2360 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
dacb3a86 2361 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce
S
2362 else:
2363 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
dacb3a86
S
2364 return video_id, None
2365 return None, None
448830ce 2366
ebf1b291
S
2367 def _real_extract(self, url):
2368 # Extract playlist id
2369 mobj = re.match(self._VALID_URL, url)
2370 if mobj is None:
2371 raise ExtractorError('Invalid URL: %s' % url)
2372 playlist_id = mobj.group(1) or mobj.group(2)
2373
dacb3a86 2374 video_id, video = self._check_download_just_video(url, playlist_id)
ebf1b291
S
2375 if video:
2376 return video
2377
466a6145 2378 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
2379 # Mixes require a custom extraction process
2380 return self._extract_mix(playlist_id)
2381
dacb3a86
S
2382 has_videos, playlist = self._extract_playlist(playlist_id)
2383 if has_videos or not video_id:
2384 return playlist
2385
2386 # Some playlist URLs don't actually serve a playlist (see
2387 # https://github.com/rg3/youtube-dl/issues/10537).
2388 # Fallback to plain video extraction if there is a video id
2389 # along with playlist id.
2390 return self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce 2391
c5e8d7af 2392
648e6a1f 2393class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2394 IE_DESC = 'YouTube.com channels'
9ff67727 2395 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 2396 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 2397 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 2398 IE_NAME = 'youtube:channel'
cdc628a4
PH
2399 _TESTS = [{
2400 'note': 'paginated channel',
2401 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2402 'playlist_mincount': 91,
acf757f4 2403 'info_dict': {
9170ca5b
JMF
2404 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2405 'title': 'Uploads from lex will',
acf757f4 2406 }
5c43afd4
JMF
2407 }, {
2408 'note': 'Age restricted channel',
2409 # from https://www.youtube.com/user/DeusExOfficial
2410 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2411 'playlist_mincount': 64,
2412 'info_dict': {
2413 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2414 'title': 'Uploads from Deus Ex',
2415 },
cdc628a4 2416 }]
c5e8d7af 2417
e462474e
S
2418 @classmethod
2419 def suitable(cls, url):
f07e276a
S
2420 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2421 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 2422
9558dcec
S
2423 def _build_template_url(self, url, channel_id):
2424 return self._TEMPLATE_URL % channel_id
2425
c5e8d7af 2426 def _real_extract(self, url):
9ff67727 2427 channel_id = self._match_id(url)
c5e8d7af 2428
9558dcec 2429 url = self._build_template_url(url, channel_id)
386bdfa6
S
2430
2431 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2432 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2433 # otherwise fallback on channel by page extraction
2434 channel_page = self._download_webpage(
2435 url + '?view=57', channel_id,
2436 'Downloading channel page', fatal=False)
2b3c2546
PH
2437 if channel_page is False:
2438 channel_playlist_id = False
2439 else:
2440 channel_playlist_id = self._html_search_meta(
2441 'channelId', channel_page, 'channel id', default=None)
2442 if not channel_playlist_id:
73c4ac2c
S
2443 channel_url = self._html_search_meta(
2444 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2445 channel_page, 'channel url', default=None)
2446 if channel_url:
2447 channel_playlist_id = self._search_regex(
2448 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2449 channel_url, 'channel id', default=None)
386bdfa6
S
2450 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2451 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
2452 return self.url_result(
2453 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 2454
60bf45c8 2455 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
2456 autogenerated = re.search(r'''(?x)
2457 class="[^"]*?(?:
2458 channel-header-autogenerated-label|
2459 yt-channel-title-autogenerated
2460 )[^"]*"''', channel_page) is not None
c5e8d7af 2461
b9643eed
JMF
2462 if autogenerated:
2463 # The videos are contained in a single page
2464 # the ajax pages can't be used, they are empty
b82f815f 2465 entries = [
fb69240c
S
2466 self.url_result(
2467 video_id, 'Youtube', video_id=video_id,
2468 video_title=video_title)
8f02ad4f 2469 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
2470 return self.playlist_result(entries, channel_id)
2471
73c4ac2c
S
2472 try:
2473 next(self._entries(channel_page, channel_id))
2474 except StopIteration:
2475 alert_message = self._html_search_regex(
2476 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2477 channel_page, 'alert', default=None, group='alert')
2478 if alert_message:
2479 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2480
648e6a1f 2481 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
2482
2483
eb0f3e7e 2484class YoutubeUserIE(YoutubeChannelIE):
78caa52a 2485 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
ea696249 2486 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
9558dcec 2487 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 2488 IE_NAME = 'youtube:user'
c5e8d7af 2489
cdc628a4
PH
2490 _TESTS = [{
2491 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2492 'playlist_mincount': 320,
2493 'info_dict': {
73c4ac2c
S
2494 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2495 'title': 'Uploads from The Linux Foundation',
cdc628a4 2496 }
9558dcec
S
2497 }, {
2498 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2499 # but not https://www.youtube.com/user/12minuteathlete/videos
2500 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2501 'playlist_mincount': 249,
2502 'info_dict': {
2503 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2504 'title': 'Uploads from 12 Minute Athlete',
2505 }
cdc628a4
PH
2506 }, {
2507 'url': 'ytuser:phihag',
2508 'only_matching': True,
daa0df9e
YCH
2509 }, {
2510 'url': 'https://www.youtube.com/c/gametrailers',
2511 'only_matching': True,
9558dcec
S
2512 }, {
2513 'url': 'https://www.youtube.com/gametrailers',
2514 'only_matching': True,
73c4ac2c 2515 }, {
0e879f43 2516 # This channel is not available, geo restricted to JP
73c4ac2c
S
2517 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2518 'only_matching': True,
cdc628a4
PH
2519 }]
2520
e3ea4790 2521 @classmethod
f4b05232 2522 def suitable(cls, url):
e3ea4790
JMF
2523 # Don't return True if the url can be extracted with other youtube
2524 # extractor, the regex would is too permissive and it would match.
f3a58d46 2525 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2526 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
2527 return False
2528 else:
2529 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 2530
9558dcec
S
2531 def _build_template_url(self, url, channel_id):
2532 mobj = re.match(self._VALID_URL, url)
2533 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2534
b05654f0 2535
f07e276a
S
2536class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2537 IE_DESC = 'YouTube.com live streams'
073d5bf5 2538 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
f07e276a
S
2539 IE_NAME = 'youtube:live'
2540
2541 _TESTS = [{
2d3d2997 2542 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
f07e276a
S
2543 'info_dict': {
2544 'id': 'a48o2S1cPoo',
2545 'ext': 'mp4',
2546 'title': 'The Young Turks - Live Main Show',
2547 'uploader': 'The Young Turks',
2548 'uploader_id': 'TheYoungTurks',
ec85ded8 2549 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
f07e276a
S
2550 'upload_date': '20150715',
2551 'license': 'Standard YouTube License',
2552 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2553 'categories': ['News & Politics'],
2554 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2555 'like_count': int,
2556 'dislike_count': int,
2557 },
2558 'params': {
2559 'skip_download': True,
2560 },
2561 }, {
2d3d2997 2562 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
f07e276a 2563 'only_matching': True,
c1b2a085
S
2564 }, {
2565 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2566 'only_matching': True,
073d5bf5
S
2567 }, {
2568 'url': 'https://www.youtube.com/TheYoungTurks/live',
2569 'only_matching': True,
f07e276a
S
2570 }]
2571
2572 def _real_extract(self, url):
2573 mobj = re.match(self._VALID_URL, url)
2574 channel_id = mobj.group('id')
2575 base_url = mobj.group('base_url')
2576 webpage = self._download_webpage(url, channel_id, fatal=False)
2577 if webpage:
2578 page_type = self._og_search_property(
e7f3529f 2579 'type', webpage, 'page type', default='')
f07e276a
S
2580 video_id = self._html_search_meta(
2581 'videoId', webpage, 'video id', default=None)
e7f3529f
S
2582 if page_type.startswith('video') and video_id and re.match(
2583 r'^[0-9A-Za-z_-]{11}$', video_id):
f07e276a
S
2584 return self.url_result(video_id, YoutubeIE.ie_key())
2585 return self.url_result(base_url)
2586
2587
e462474e
S
2588class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2589 IE_DESC = 'YouTube.com user/channel playlists'
2590 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2591 IE_NAME = 'youtube:playlists'
0c148415 2592
e568c223 2593 _TESTS = [{
2d3d2997 2594 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
0c148415
S
2595 'playlist_mincount': 4,
2596 'info_dict': {
2597 'id': 'ThirstForScience',
2598 'title': 'Thirst for Science',
2599 },
e568c223
S
2600 }, {
2601 # with "Load more" button
2d3d2997 2602 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
e568c223
S
2603 'playlist_mincount': 70,
2604 'info_dict': {
2605 'id': 'igorkle1',
2606 'title': 'Игорь Клейнер',
2607 },
e462474e
S
2608 }, {
2609 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
2610 'playlist_mincount': 17,
2611 'info_dict': {
2612 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
2613 'title': 'Chem Player',
2614 },
e568c223 2615 }]
0c148415
S
2616
2617
870f3bfc
S
2618class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
2619 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
2620
2621
2622class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
78caa52a 2623 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
2624 # there doesn't appear to be a real limit, for example if you search for
2625 # 'python' you get more than 8.000.000 results
2626 _MAX_RESULTS = float('inf')
78caa52a 2627 IE_NAME = 'youtube:search'
b05654f0 2628 _SEARCH_KEY = 'ytsearch'
b4c08069 2629 _EXTRA_QUERY_ARGS = {}
9dd8e46a 2630 _TESTS = []
b05654f0 2631
b05654f0
PH
2632 def _get_n_results(self, query, n):
2633 """Get a specified number of results for a query"""
2634
b4c08069 2635 videos = []
b05654f0
PH
2636 limit = n
2637
a22b2fd1
YCH
2638 url_query = {
2639 'search_query': query.encode('utf-8'),
2640 }
2641 url_query.update(self._EXTRA_QUERY_ARGS)
2642 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
2643
b4c08069 2644 for pagenum in itertools.count(1):
b4c08069 2645 data = self._download_json(
69ea8ca4 2646 result_url, video_id='query "%s"' % query,
b4c08069 2647 note='Downloading page %s' % pagenum,
a22b2fd1
YCH
2648 errnote='Unable to download API page',
2649 query={'spf': 'navigate'})
b4c08069 2650 html_content = data[1]['body']['content']
7cc3570e 2651
b4c08069 2652 if 'class="search-message' in html_content:
07ad22b8 2653 raise ExtractorError(
78caa52a 2654 '[youtube] No video results', expected=True)
b05654f0 2655
870f3bfc 2656 new_videos = list(self._process_page(html_content))
b4c08069
JMF
2657 videos += new_videos
2658 if not new_videos or len(videos) > limit:
2659 break
a22b2fd1
YCH
2660 next_link = self._html_search_regex(
2661 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
2662 html_content, 'next link', default=None)
2663 if next_link is None:
2664 break
2665 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
b05654f0 2666
b4c08069
JMF
2667 if len(videos) > n:
2668 videos = videos[:n]
b05654f0 2669 return self.playlist_result(videos, query)
75dff0ee 2670
c9ae7b95 2671
a3dd9248 2672class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 2673 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 2674 _SEARCH_KEY = 'ytsearchdate'
78caa52a 2675 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 2676 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 2677
c9ae7b95 2678
870f3bfc 2679class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
78caa52a
PH
2680 IE_DESC = 'YouTube.com search URLs'
2681 IE_NAME = 'youtube:search_url'
d2c1f79f 2682 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
cdc628a4
PH
2683 _TESTS = [{
2684 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
2685 'playlist_mincount': 5,
2686 'info_dict': {
2687 'title': 'youtube-dl test video',
2688 }
d2c1f79f
S
2689 }, {
2690 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
2691 'only_matching': True,
cdc628a4 2692 }]
c9ae7b95
PH
2693
2694 def _real_extract(self, url):
2695 mobj = re.match(self._VALID_URL, url)
7fd002c0 2696 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 2697 webpage = self._download_webpage(url, query)
175c2e9e 2698 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
2699
2700
136dadde 2701class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 2702 IE_DESC = 'YouTube.com (multi-season) shows'
92519402 2703 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 2704 IE_NAME = 'youtube:show'
cdc628a4 2705 _TESTS = [{
4003bd82 2706 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 2707 'playlist_mincount': 5,
cdc628a4
PH
2708 'info_dict': {
2709 'id': 'airdisasters',
2710 'title': 'Air Disasters',
2711 }
2712 }]
75dff0ee
JMF
2713
2714 def _real_extract(self, url):
136dadde
S
2715 playlist_id = self._match_id(url)
2716 return super(YoutubeShowIE, self)._real_extract(
2717 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
2718
2719
b2e8bc1b 2720class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 2721 """
25f14e9f 2722 Base class for feed extractors
d7ae0639
JMF
2723 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
2724 """
b2e8bc1b 2725 _LOGIN_REQUIRED = True
d7ae0639
JMF
2726
2727 @property
2728 def IE_NAME(self):
78caa52a 2729 return 'youtube:%s' % self._FEED_NAME
04cc9617 2730
81f0259b 2731 def _real_initialize(self):
b2e8bc1b 2732 self._login()
81f0259b 2733
3853309f 2734 def _entries(self, page):
2bc43303
JMF
2735 # The extraction process is the same as for playlists, but the regex
2736 # for the video ids doesn't contain an index
2737 ids = []
2738 more_widget_html = content_html = page
2bc43303
JMF
2739 for page_num in itertools.count(1):
2740 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
2741
2742 # 'recommended' feed has infinite 'load more' and each new portion spins
2743 # the same videos in (sometimes) slightly different order, so we'll check
2744 # for unicity and break when portion has no new videos
3853309f 2745 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
62c95fd5
S
2746 if not new_ids:
2747 break
2748
2bc43303
JMF
2749 ids.extend(new_ids)
2750
3853309f
S
2751 for entry in self._ids_to_results(new_ids):
2752 yield entry
2753
2bc43303
JMF
2754 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
2755 if not mobj:
2756 break
2757
2758 more = self._download_json(
25f14e9f 2759 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303
JMF
2760 'Downloading page #%s' % page_num,
2761 transform_source=uppercase_escape)
2762 content_html = more['content_html']
2763 more_widget_html = more['load_more_widget_html']
2764
3853309f
S
2765 def _real_extract(self, url):
2766 page = self._download_webpage(
2767 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
2768 self._PLAYLIST_TITLE)
25f14e9f 2769 return self.playlist_result(
3853309f 2770 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
25f14e9f
S
2771
2772
2773class YoutubeWatchLaterIE(YoutubePlaylistIE):
2774 IE_NAME = 'youtube:watchlater'
2775 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
92519402 2776 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 2777
bc7a9cd8
S
2778 _TESTS = [{
2779 'url': 'https://www.youtube.com/playlist?list=WL',
2780 'only_matching': True,
2781 }, {
2782 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
2783 'only_matching': True,
2784 }]
25f14e9f
S
2785
2786 def _real_extract(self, url):
7e5dc339 2787 _, video = self._check_download_just_video(url, 'WL')
ebf1b291
S
2788 if video:
2789 return video
dacb3a86
S
2790 _, playlist = self._extract_playlist('WL')
2791 return playlist
f459d170 2792
5f6a1245 2793
c626a3d9 2794class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 2795 IE_NAME = 'youtube:favorites'
f3a34072 2796 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
92519402 2797 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
2798 _LOGIN_REQUIRED = True
2799
2800 def _real_extract(self, url):
2801 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 2802 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 2803 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
2804
2805
25f14e9f
S
2806class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
2807 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
92519402 2808 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
25f14e9f
S
2809 _FEED_NAME = 'recommended'
2810 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 2811
1ed5b5c9 2812
25f14e9f
S
2813class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
2814 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
92519402 2815 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
25f14e9f
S
2816 _FEED_NAME = 'subscriptions'
2817 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 2818
1ed5b5c9 2819
25f14e9f
S
2820class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
2821 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
92519402 2822 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
25f14e9f
S
2823 _FEED_NAME = 'history'
2824 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
2825
2826
15870e90
PH
2827class YoutubeTruncatedURLIE(InfoExtractor):
2828 IE_NAME = 'youtube:truncated_url'
2829 IE_DESC = False # Do not list
975d35db 2830 _VALID_URL = r'''(?x)
b95aab84
PH
2831 (?:https?://)?
2832 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
2833 (?:watch\?(?:
c4808c60 2834 feature=[a-z_]+|
b95aab84
PH
2835 annotation_id=annotation_[^&]+|
2836 x-yt-cl=[0-9]+|
c1708b89 2837 hl=[^&]*|
287be8c6 2838 t=[0-9]+
b95aab84
PH
2839 )?
2840 |
2841 attribution_link\?a=[^&]+
2842 )
2843 $
975d35db 2844 '''
15870e90 2845
c4808c60 2846 _TESTS = [{
2d3d2997 2847 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 2848 'only_matching': True,
dc2fc736 2849 }, {
2d3d2997 2850 'url': 'https://www.youtube.com/watch?',
dc2fc736 2851 'only_matching': True,
b95aab84
PH
2852 }, {
2853 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
2854 'only_matching': True,
2855 }, {
2856 'url': 'https://www.youtube.com/watch?feature=foo',
2857 'only_matching': True,
c1708b89
PH
2858 }, {
2859 'url': 'https://www.youtube.com/watch?hl=en-GB',
2860 'only_matching': True,
287be8c6
PH
2861 }, {
2862 'url': 'https://www.youtube.com/watch?t=2372',
2863 'only_matching': True,
c4808c60
PH
2864 }]
2865
15870e90
PH
2866 def _real_extract(self, url):
2867 raise ExtractorError(
78caa52a
PH
2868 'Did you forget to quote the URL? Remember that & is a meta '
2869 'character in most shells, so you want to put the URL in quotes, '
2870 'like youtube-dl '
2d3d2997 2871 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
78caa52a 2872 ' or simply youtube-dl BaW_jenozKc .',
15870e90 2873 expected=True)
772fd5cc
PH
2874
2875
2876class YoutubeTruncatedIDIE(InfoExtractor):
2877 IE_NAME = 'youtube:truncated_id'
2878 IE_DESC = False # Do not list
b95aab84 2879 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
2880
2881 _TESTS = [{
2882 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
2883 'only_matching': True,
2884 }]
2885
2886 def _real_extract(self, url):
2887 video_id = self._match_id(url)
2888 raise ExtractorError(
2889 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
2890 expected=True)