]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
[youtube] Add support for invidious tor instances (#22268)
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
f8c55c66 19 compat_HTTPError,
8d81f3e3 20 compat_kwargs,
c5e8d7af 21 compat_parse_qs,
7fd002c0
S
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
c5e8d7af 27 compat_str,
4bb4a188
PH
28)
29from ..utils import (
27019dbb 30 bool_or_none,
c5e8d7af 31 clean_html,
026fbedc 32 dict_get,
9b9c5355 33 error_to_compat_str,
351f37c0 34 extract_attributes,
c5e8d7af 35 ExtractorError,
2d30521a 36 float_or_none,
4bb4a188
PH
37 get_element_by_attribute,
38 get_element_by_id,
dd27fd17 39 int_or_none,
94278f72 40 mimetype2ext,
4bb4a188 41 orderedSet,
6310acf5 42 parse_codecs,
7c80519c 43 parse_duration,
54fc90aa 44 qualities,
0cb58b02 45 remove_quotes,
3995d37d 46 remove_start,
cf7e015f 47 smuggle_url,
dbdaaa23 48 str_or_none,
c93d53f5 49 str_to_int,
556dbe7f 50 try_get,
c5e8d7af
PH
51 unescapeHTML,
52 unified_strdate,
cf7e015f 53 unsmuggle_url,
81c2f20b 54 uppercase_escape,
21c340b8 55 url_or_none,
6e6bc8da 56 urlencode_postdata,
c5e8d7af
PH
57)
58
5f6a1245 59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
b2e8bc1b
JMF
69 _NETRC_MACHINE = 'youtube'
70 # If True it will raise an error if no login info is provided
71 _LOGIN_REQUIRED = False
72
409b9324 73 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
d0ba5587 74
b2e8bc1b 75 def _set_language(self):
810fb84d
PH
76 self._set_cookie(
77 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 78 # YouTube sets the expire time to about two months
810fb84d 79 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 80
25f14e9f
S
81 def _ids_to_results(self, ids):
82 return [
83 self.url_result(vid_id, 'Youtube', video_id=vid_id)
84 for vid_id in ids]
85
b2e8bc1b 86 def _login(self):
83317f69 87 """
88 Attempt to log in to YouTube.
89 True is returned if successful or skipped.
90 False is returned if login failed.
91
92 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
93 """
68217024 94 username, password = self._get_login_info()
b2e8bc1b
JMF
95 # No authentication to be performed
96 if username is None:
70d35d16 97 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 98 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 99 return True
b2e8bc1b 100
7cc3570e
PH
101 login_page = self._download_webpage(
102 self._LOGIN_URL, None,
69ea8ca4
PH
103 note='Downloading login page',
104 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
105 if login_page is False:
106 return
b2e8bc1b 107
1212e997 108 login_form = self._hidden_inputs(login_page)
c5e8d7af 109
e00eb564
S
110 def req(url, f_req, note, errnote):
111 data = login_form.copy()
112 data.update({
113 'pstMsg': 1,
114 'checkConnection': 'youtube',
115 'checkedDomains': 'youtube',
116 'hl': 'en',
117 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 118 'f.req': json.dumps(f_req),
e00eb564
S
119 'flowName': 'GlifWebSignIn',
120 'flowEntry': 'ServiceLogin',
baf67a60
S
121 # TODO: reverse actual botguard identifier generation algo
122 'bgRequest': '["identifier",""]',
041bc3ad 123 })
e00eb564
S
124 return self._download_json(
125 url, None, note=note, errnote=errnote,
126 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
127 fatal=False,
128 data=urlencode_postdata(data), headers={
129 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
130 'Google-Accounts-XSRF': 1,
131 })
132
3995d37d
S
133 def warn(message):
134 self._downloader.report_warning(message)
135
136 lookup_req = [
137 username,
138 None, [], None, 'US', None, None, 2, False, True,
139 [
140 None, None,
141 [2, 1, None, 1,
142 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
143 None, [], 4],
144 1, [None, None, []], None, None, None, True
145 ],
146 username,
147 ]
148
e00eb564 149 lookup_results = req(
3995d37d 150 self._LOOKUP_URL, lookup_req,
e00eb564
S
151 'Looking up account info', 'Unable to look up account info')
152
153 if lookup_results is False:
154 return False
041bc3ad 155
3995d37d
S
156 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
157 if not user_hash:
158 warn('Unable to extract user hash')
159 return False
160
161 challenge_req = [
162 user_hash,
163 None, 1, None, [1, None, None, None, [password, None, True]],
164 [
165 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
166 1, [None, None, []], None, None, None, True
167 ]]
83317f69 168
3995d37d
S
169 challenge_results = req(
170 self._CHALLENGE_URL, challenge_req,
171 'Logging in', 'Unable to log in')
83317f69 172
3995d37d 173 if challenge_results is False:
e00eb564 174 return
83317f69 175
3995d37d
S
176 login_res = try_get(challenge_results, lambda x: x[0][5], list)
177 if login_res:
178 login_msg = try_get(login_res, lambda x: x[5], compat_str)
179 warn(
180 'Unable to login: %s' % 'Invalid password'
181 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
182 return False
183
184 res = try_get(challenge_results, lambda x: x[0][-1], list)
185 if not res:
186 warn('Unable to extract result entry')
187 return False
188
9a6628aa
S
189 login_challenge = try_get(res, lambda x: x[0][0], list)
190 if login_challenge:
191 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
192 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
193 # SEND_SUCCESS - TFA code has been successfully sent to phone
194 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 195 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
196 if status == 'QUOTA_EXCEEDED':
197 warn('Exceeded the limit of TFA codes, try later')
198 return False
199
200 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
201 if not tl:
202 warn('Unable to extract TL')
203 return False
204
205 tfa_code = self._get_tfa_info('2-step verification code')
206
207 if not tfa_code:
208 warn(
209 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
210 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
211 return False
212
213 tfa_code = remove_start(tfa_code, 'G-')
214
215 tfa_req = [
216 user_hash, None, 2, None,
217 [
218 9, None, None, None, None, None, None, None,
219 [None, tfa_code, True, 2]
220 ]]
221
222 tfa_results = req(
223 self._TFA_URL.format(tl), tfa_req,
224 'Submitting TFA code', 'Unable to submit TFA code')
225
226 if tfa_results is False:
227 return False
228
229 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
230 if tfa_res:
231 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
232 warn(
233 'Unable to finish TFA: %s' % 'Invalid TFA code'
234 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
235 return False
236
237 check_cookie_url = try_get(
238 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
239 else:
240 CHALLENGES = {
241 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
242 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
243 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
244 }
245 challenge = CHALLENGES.get(
246 challenge_str,
247 '%s returned error %s.' % (self.IE_NAME, challenge_str))
248 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
249 return False
3995d37d
S
250 else:
251 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
252
253 if not check_cookie_url:
254 warn('Unable to extract CheckCookie URL')
255 return False
e00eb564
S
256
257 check_cookie_results = self._download_webpage(
3995d37d
S
258 check_cookie_url, None, 'Checking cookie', fatal=False)
259
260 if check_cookie_results is False:
261 return False
e00eb564 262
3995d37d
S
263 if 'https://myaccount.google.com/' not in check_cookie_results:
264 warn('Unable to log in')
b2e8bc1b 265 return False
e00eb564 266
b2e8bc1b
JMF
267 return True
268
30226342 269 def _download_webpage_handle(self, *args, **kwargs):
c1148516
S
270 query = kwargs.get('query', {}).copy()
271 query['disable_polymer'] = 'true'
272 kwargs['query'] = query
30226342 273 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
274 *args, **compat_kwargs(kwargs))
275
b2e8bc1b
JMF
276 def _real_initialize(self):
277 if self._downloader is None:
278 return
42939b61 279 self._set_language()
b2e8bc1b
JMF
280 if not self._login():
281 return
c5e8d7af 282
8377574c 283
8e7aad20 284class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 285 # Extract entries from page with "Load more" button
648e6a1f
S
286 def _entries(self, page, playlist_id):
287 more_widget_html = content_html = page
288 for page_num in itertools.count(1):
061a75ed
S
289 for entry in self._process_page(content_html):
290 yield entry
648e6a1f
S
291
292 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
293 if not mobj:
294 break
295
f8c55c66
S
296 count = 0
297 retries = 3
298 while count <= retries:
299 try:
300 # Downloading page may result in intermittent 5xx HTTP error
301 # that is usually worked around with a retry
302 more = self._download_json(
303 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
304 'Downloading page #%s%s'
305 % (page_num, ' (retry #%d)' % count if count else ''),
306 transform_source=uppercase_escape)
307 break
308 except ExtractorError as e:
309 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
310 count += 1
311 if count <= retries:
312 continue
313 raise
314
648e6a1f
S
315 content_html = more['content_html']
316 if not content_html.strip():
317 # Some webpages show a "Load more" button but they don't
318 # have more videos
319 break
320 more_widget_html = more['load_more_widget_html']
321
061a75ed
S
322
323class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
324 def _process_page(self, content):
325 for video_id, video_title in self.extract_videos_from_page(content):
326 yield self.url_result(video_id, 'Youtube', video_id, video_title)
327
351f37c0
S
328 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
329 for mobj in re.finditer(video_re, page):
648e6a1f
S
330 # The link with index 0 is not the first video of the playlist (not sure if still actual)
331 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
332 continue
333 video_id = mobj.group('id')
351f37c0
S
334 video_title = unescapeHTML(
335 mobj.group('title')) if 'title' in mobj.groupdict() else None
648e6a1f
S
336 if video_title:
337 video_title = video_title.strip()
351f37c0
S
338 if video_title == '► Play all':
339 video_title = None
648e6a1f
S
340 try:
341 idx = ids_in_page.index(video_id)
342 if video_title and not titles_in_page[idx]:
343 titles_in_page[idx] = video_title
344 except ValueError:
345 ids_in_page.append(video_id)
346 titles_in_page.append(video_title)
351f37c0
S
347
348 def extract_videos_from_page(self, page):
349 ids_in_page = []
350 titles_in_page = []
351 self.extract_videos_from_page_impl(
352 self._VIDEO_RE, page, ids_in_page, titles_in_page)
648e6a1f
S
353 return zip(ids_in_page, titles_in_page)
354
355
061a75ed
S
356class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
357 def _process_page(self, content):
6dee688e
S
358 for playlist_id in orderedSet(re.findall(
359 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
360 content)):
061a75ed
S
361 yield self.url_result(
362 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
363
0c148415
S
364 def _real_extract(self, url):
365 playlist_id = self._match_id(url)
366 webpage = self._download_webpage(url, playlist_id)
0c148415 367 title = self._og_search_title(webpage, fatal=False)
061a75ed 368 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
369
370
360e1ca5 371class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 372 IE_DESC = 'YouTube.com'
cb7dfeea 373 _VALID_URL = r"""(?x)^
c5e8d7af 374 (
edb53e2d 375 (?:https?://|//) # http(s):// or protocol-independent URL
cb7dfeea 376 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
484aaeb2 377 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 378 (?:www\.)?pwnyoutube\.com/|
8b561bfc 379 (?:www\.)?hooktube\.com/|
f7000f3a 380 (?:www\.)?yourepeat\.com/|
e69ae5b9 381 tube\.majestyc\.net/|
ba036333 382 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
77d95677 383 (?:(?:www|dev)\.)?invidio\.us/|
ba036333 384 (?:(?:www|no)\.)?invidiou\.sh/|
385 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
8ae113ca 386 (?:www\.)?invidious\.kabi\.tk/|
ba036333 387 (?:www\.)?invidious\.enkirton\.net/|
388 (?:www\.)?invidious\.13ad\.de/|
791d2e81 389 (?:www\.)?invidious\.mastodon\.host/|
494d664e 390 (?:www\.)?invidious\.nixnet\.xyz/|
ba036333 391 (?:www\.)?tube\.poal\.co/|
8ae113ca 392 (?:www\.)?vid\.wxzm\.sx/|
494d664e 393 (?:www\.)?yt\.elukerio\.org/|
bff90fc5 394 (?:www\.)?kgg2m7yk5aybusll\.onion/|
395 (?:www\.)?qklhadlycap4cnod\.onion/|
396 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
397 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
398 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
399 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
e69ae5b9 400 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
401 (?:.*?\#/)? # handle anchor (#/) redirect urls
402 (?: # the various things that can precede the ID:
ac7553d0 403 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 404 |(?: # or the v= param in all its forms
f7000f3a 405 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 406 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 407 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
408 v=
409 )
f4b05232 410 ))
cbaed4bb
S
411 |(?:
412 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
413 vid\.plus| # or vid.plus/xxxx
414 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 415 )/
edb53e2d 416 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 417 )
c5e8d7af 418 )? # all until now is optional -> you can pass the naked ID
8963d9c2 419 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
420 (?!.*?\blist=
421 (?:
422 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
423 WL # WL are handled by the watch later IE
424 )
425 )
c5e8d7af 426 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 427 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 428 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26 429 _formats = {
c2d3cb4c 430 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
431 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
432 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
433 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
434 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
435 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
436 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
437 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 438 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 439 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
440 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
441 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
442 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
443 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
444 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 445 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 446 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
447 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 448
449
450 # 3D videos
c2d3cb4c 451 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
452 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
453 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
454 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 455 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
456 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
457 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 458
96fb5605 459 # Apple HTTP Live Streaming
11f12195 460 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 461 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
462 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
463 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
464 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
465 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 466 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
467 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
468
469 # DASH mp4 video
d23028a8
S
470 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
471 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
472 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
473 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
474 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 475 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
476 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
477 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
478 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
479 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
480 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
481 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 482
f6f1fc92 483 # Dash mp4 audio
d23028a8
S
484 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
485 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
486 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
487 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
488 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
489 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
490 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
491
492 # Dash webm
d23028a8
S
493 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
494 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
495 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
496 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
497 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
498 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
499 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
500 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
501 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
502 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
503 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
504 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
505 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
507 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 508 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
509 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
510 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
511 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
512 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
513 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
514 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
515
516 # Dash webm audio
d23028a8
S
517 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
518 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 519
0857baad 520 # Dash webm audio with opus inside
d23028a8
S
521 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
522 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
523 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 524
ce6b9a2d
PH
525 # RTMP (unnamed)
526 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
527
528 # av01 video only formats sometimes served with "unknown" codecs
529 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
530 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
531 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
532 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 533 }
19041a38 534 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 535
fd5c4aab
S
536 _GEO_BYPASS = False
537
78caa52a 538 IE_NAME = 'youtube'
2eb88d95
PH
539 _TESTS = [
540 {
2d3d2997 541 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
542 'info_dict': {
543 'id': 'BaW_jenozKc',
544 'ext': 'mp4',
545 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
546 'uploader': 'Philipp Hagemeister',
547 'uploader_id': 'phihag',
ec85ded8 548 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
549 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
550 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e
PH
551 'upload_date': '20121002',
552 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
553 'categories': ['Science & Technology'],
000b6b5a 554 'tags': ['youtube-dl'],
556dbe7f 555 'duration': 10,
dbdaaa23 556 'view_count': int,
3e7c1224
PH
557 'like_count': int,
558 'dislike_count': int,
7c80519c 559 'start_time': 1,
297a564b 560 'end_time': 9,
2eb88d95 561 }
0e853ca4 562 },
0e853ca4 563 {
2d3d2997 564 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
4bc3a23e
PH
565 'note': 'Test generic use_cipher_signature video (#897)',
566 'info_dict': {
567 'id': 'UxxajLWwzqY',
568 'ext': 'mp4',
569 'upload_date': '20120506',
570 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 571 'alt_title': 'I Love It (feat. Charli XCX)',
7caf9830 572 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
000b6b5a
S
573 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
574 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
575 'iconic ep', 'iconic', 'love', 'it'],
556dbe7f 576 'duration': 180,
4bc3a23e
PH
577 'uploader': 'Icona Pop',
578 'uploader_id': 'IconaPop',
ec85ded8 579 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
0cb58b02 580 'creator': 'Icona Pop',
936784b2
S
581 'track': 'I Love It (feat. Charli XCX)',
582 'artist': 'Icona Pop',
2eb88d95 583 }
c108eb73
JMF
584 },
585 {
4bc3a23e
PH
586 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
587 'note': 'Test VEVO video with age protection (#956)',
588 'info_dict': {
589 'id': '07FYdnEawAQ',
590 'ext': 'mp4',
591 'upload_date': '20130703',
4fe54c12 592 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
0cb58b02 593 'alt_title': 'Tunnel Vision',
4fe54c12 594 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
556dbe7f 595 'duration': 419,
4bc3a23e
PH
596 'uploader': 'justintimberlakeVEVO',
597 'uploader_id': 'justintimberlakeVEVO',
ec85ded8 598 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
0cb58b02 599 'creator': 'Justin Timberlake',
7e72694b 600 'track': 'Tunnel Vision',
936784b2 601 'artist': 'Justin Timberlake',
34952f09 602 'age_limit': 18,
c108eb73
JMF
603 }
604 },
fccd3771 605 {
4bc3a23e
PH
606 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
607 'note': 'Embed-only video (#1746)',
608 'info_dict': {
609 'id': 'yZIXLfi8CZQ',
610 'ext': 'mp4',
611 'upload_date': '20120608',
612 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
613 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
614 'uploader': 'SET India',
94bfcd23 615 'uploader_id': 'setindia',
ec85ded8 616 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 617 'age_limit': 18,
fccd3771
PH
618 }
619 },
11b56058 620 {
2d3d2997 621 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
622 'note': 'Use the first video ID in the URL',
623 'info_dict': {
624 'id': 'BaW_jenozKc',
625 'ext': 'mp4',
626 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
627 'uploader': 'Philipp Hagemeister',
628 'uploader_id': 'phihag',
ec85ded8 629 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058
PM
630 'upload_date': '20121002',
631 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
632 'categories': ['Science & Technology'],
633 'tags': ['youtube-dl'],
556dbe7f 634 'duration': 10,
dbdaaa23 635 'view_count': int,
11b56058
PM
636 'like_count': int,
637 'dislike_count': int,
34a7de29
S
638 },
639 'params': {
640 'skip_download': True,
641 },
11b56058 642 },
dd27fd17 643 {
2d3d2997 644 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
645 'note': '256k DASH audio (format 141) via DASH manifest',
646 'info_dict': {
647 'id': 'a9LDPn-MO4I',
648 'ext': 'm4a',
649 'upload_date': '20121002',
650 'uploader_id': '8KVIDEO',
ec85ded8 651 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
652 'description': '',
653 'uploader': '8KVIDEO',
654 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 655 },
4bc3a23e
PH
656 'params': {
657 'youtube_include_dash_manifest': True,
658 'format': '141',
4919603f 659 },
de3c7fe0 660 'skip': 'format 141 not served anymore',
dd27fd17 661 },
3489b7d2
JMF
662 # DASH manifest with encrypted signature
663 {
78caa52a
PH
664 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
665 'info_dict': {
666 'id': 'IB3lcPjvWLA',
667 'ext': 'm4a',
4fe54c12
S
668 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
669 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
556dbe7f 670 'duration': 244,
78caa52a
PH
671 'uploader': 'AfrojackVEVO',
672 'uploader_id': 'AfrojackVEVO',
673 'upload_date': '20131011',
3489b7d2 674 },
4bc3a23e 675 'params': {
78caa52a 676 'youtube_include_dash_manifest': True,
de3c7fe0 677 'format': '141/bestaudio[ext=m4a]',
3489b7d2
JMF
678 },
679 },
aaeb86f6
S
680 # JS player signature function name containing $
681 {
682 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
683 'info_dict': {
684 'id': 'nfWlot6h_JM',
685 'ext': 'm4a',
686 'title': 'Taylor Swift - Shake It Off',
4fe54c12 687 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
556dbe7f 688 'duration': 242,
aaeb86f6
S
689 'uploader': 'TaylorSwiftVEVO',
690 'uploader_id': 'TaylorSwiftVEVO',
691 'upload_date': '20140818',
0cb58b02 692 'creator': 'Taylor Swift',
aaeb86f6
S
693 },
694 'params': {
695 'youtube_include_dash_manifest': True,
de3c7fe0 696 'format': '141/bestaudio[ext=m4a]',
aaeb86f6
S
697 },
698 },
aa79ac0c
PH
699 # Controversy video
700 {
701 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
702 'info_dict': {
703 'id': 'T4XJQO3qol8',
704 'ext': 'mp4',
556dbe7f 705 'duration': 219,
aa79ac0c 706 'upload_date': '20100909',
4fe54c12 707 'uploader': 'Amazing Atheist',
aa79ac0c 708 'uploader_id': 'TheAmazingAtheist',
ec85ded8 709 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c
PH
710 'title': 'Burning Everyone\'s Koran',
711 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
712 }
c522adb1
JMF
713 },
714 # Normal age-gate video (No vevo, embed allowed)
715 {
2d3d2997 716 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
717 'info_dict': {
718 'id': 'HtVdAasjOgU',
719 'ext': 'mp4',
720 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 721 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 722 'duration': 142,
c522adb1
JMF
723 'uploader': 'The Witcher',
724 'uploader_id': 'WitcherGame',
ec85ded8 725 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 726 'upload_date': '20140605',
34952f09 727 'age_limit': 18,
c522adb1
JMF
728 },
729 },
fccae2b9
S
730 # Age-gate video with encrypted signature
731 {
2d3d2997 732 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
fccae2b9
S
733 'info_dict': {
734 'id': '6kLq3WMV1nU',
4fe54c12 735 'ext': 'mp4',
fccae2b9
S
736 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
737 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
eb6793ba 738 'duration': 246,
fccae2b9
S
739 'uploader': 'LloydVEVO',
740 'uploader_id': 'LloydVEVO',
ec85ded8 741 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 742 'upload_date': '20110629',
34952f09 743 'age_limit': 18,
fccae2b9
S
744 },
745 },
067aa17e 746 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
7d02dcfa 747 # YouTube Red ad is not captured for creator
774e208f
PH
748 {
749 'url': '__2ABJjxzNo',
750 'info_dict': {
751 'id': '__2ABJjxzNo',
752 'ext': 'mp4',
556dbe7f 753 'duration': 266,
774e208f
PH
754 'upload_date': '20100430',
755 'uploader_id': 'deadmau5',
ec85ded8 756 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
0cb58b02 757 'creator': 'deadmau5',
774e208f
PH
758 'description': 'md5:12c56784b8032162bb936a5f76d55360',
759 'uploader': 'deadmau5',
760 'title': 'Deadmau5 - Some Chords (HD)',
0cb58b02 761 'alt_title': 'Some Chords',
774e208f
PH
762 },
763 'expected_warnings': [
764 'DASH manifest missing',
765 ]
e52a40ab 766 },
067aa17e 767 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
768 {
769 'url': 'lqQg6PlCWgI',
770 'info_dict': {
771 'id': 'lqQg6PlCWgI',
772 'ext': 'mp4',
556dbe7f 773 'duration': 6085,
90227264 774 'upload_date': '20150827',
cbe2bd91 775 'uploader_id': 'olympic',
ec85ded8 776 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 777 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 778 'uploader': 'Olympic',
cbe2bd91
PH
779 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
780 },
781 'params': {
782 'skip_download': 'requires avconv',
e52a40ab 783 }
cbe2bd91 784 },
6271f1ca
PH
785 # Non-square pixels
786 {
787 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
788 'info_dict': {
789 'id': '_b-2C3KPAM0',
790 'ext': 'mp4',
791 'stretched_ratio': 16 / 9.,
556dbe7f 792 'duration': 85,
6271f1ca
PH
793 'upload_date': '20110310',
794 'uploader_id': 'AllenMeow',
ec85ded8 795 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 796 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 797 'uploader': '孫ᄋᄅ',
6271f1ca
PH
798 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
799 },
06b491eb
S
800 },
801 # url_encoded_fmt_stream_map is empty string
802 {
803 'url': 'qEJwOuvDf7I',
804 'info_dict': {
805 'id': 'qEJwOuvDf7I',
f57b7835 806 'ext': 'webm',
06b491eb
S
807 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
808 'description': '',
809 'upload_date': '20150404',
810 'uploader_id': 'spbelect',
811 'uploader': 'Наблюдатели Петербурга',
812 },
813 'params': {
814 'skip_download': 'requires avconv',
e323cf3f
S
815 },
816 'skip': 'This live event has ended.',
06b491eb 817 },
067aa17e 818 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
819 {
820 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
821 'info_dict': {
822 'id': 'FIl7x6_3R5Y',
eb6793ba 823 'ext': 'webm',
da77d856
S
824 'title': 'md5:7b81415841e02ecd4313668cde88737a',
825 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 826 'duration': 220,
da77d856
S
827 'upload_date': '20150625',
828 'uploader_id': 'dorappi2000',
ec85ded8 829 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 830 'uploader': 'dorappi2000',
eb6793ba 831 'formats': 'mincount:31',
da77d856 832 },
eb6793ba 833 'skip': 'not actual anymore',
2ee8f5d8 834 },
8a1a26ce
YCH
835 # DASH manifest with segment_list
836 {
837 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
838 'md5': '8ce563a1d667b599d21064e982ab9e31',
839 'info_dict': {
840 'id': 'CsmdDsKjzN8',
841 'ext': 'mp4',
17ee98e1 842 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
843 'uploader': 'Airtek',
844 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
845 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
846 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
847 },
848 'params': {
849 'youtube_include_dash_manifest': True,
850 'format': '135', # bestvideo
be49068d
S
851 },
852 'skip': 'This live event has ended.',
2ee8f5d8 853 },
cf7e015f
S
854 {
855 # Multifeed videos (multiple cameras), URL is for Main Camera
856 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
857 'info_dict': {
858 'id': 'jqWvoWXjCVs',
859 'title': 'teamPGP: Rocket League Noob Stream',
860 'description': 'md5:dc7872fb300e143831327f1bae3af010',
861 },
862 'playlist': [{
863 'info_dict': {
864 'id': 'jqWvoWXjCVs',
865 'ext': 'mp4',
866 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
867 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 868 'duration': 7335,
cf7e015f
S
869 'upload_date': '20150721',
870 'uploader': 'Beer Games Beer',
871 'uploader_id': 'beergamesbeer',
ec85ded8 872 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 873 'license': 'Standard YouTube License',
cf7e015f
S
874 },
875 }, {
876 'info_dict': {
877 'id': '6h8e8xoXJzg',
878 'ext': 'mp4',
879 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
880 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 881 'duration': 7337,
cf7e015f
S
882 'upload_date': '20150721',
883 'uploader': 'Beer Games Beer',
884 'uploader_id': 'beergamesbeer',
ec85ded8 885 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 886 'license': 'Standard YouTube License',
cf7e015f
S
887 },
888 }, {
889 'info_dict': {
890 'id': 'PUOgX5z9xZw',
891 'ext': 'mp4',
892 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
893 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 894 'duration': 7337,
cf7e015f
S
895 'upload_date': '20150721',
896 'uploader': 'Beer Games Beer',
897 'uploader_id': 'beergamesbeer',
ec85ded8 898 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 899 'license': 'Standard YouTube License',
cf7e015f
S
900 },
901 }, {
902 'info_dict': {
903 'id': 'teuwxikvS5k',
904 'ext': 'mp4',
905 'title': 'teamPGP: Rocket League Noob Stream (zim)',
906 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 907 'duration': 7334,
cf7e015f
S
908 'upload_date': '20150721',
909 'uploader': 'Beer Games Beer',
910 'uploader_id': 'beergamesbeer',
ec85ded8 911 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 912 'license': 'Standard YouTube License',
cf7e015f
S
913 },
914 }],
915 'params': {
916 'skip_download': True,
917 },
4fe54c12 918 'skip': 'This video is not available.',
cbaed4bb 919 },
f9f49d87 920 {
067aa17e 921 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
922 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
923 'info_dict': {
924 'id': 'gVfLd0zydlo',
925 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
926 },
927 'playlist_count': 2,
be49068d 928 'skip': 'Not multifeed anymore',
f9f49d87 929 },
cbaed4bb 930 {
2d3d2997 931 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 932 'only_matching': True,
0e49d9a6 933 },
6d4fc66b 934 {
2d3d2997 935 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
936 'only_matching': True,
937 },
0e49d9a6 938 {
067aa17e 939 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 940 # Also tests cut-off URL expansion in video description (see
067aa17e
S
941 # https://github.com/ytdl-org/youtube-dl/issues/1892,
942 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
943 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
944 'info_dict': {
945 'id': 'lsguqyKfVQg',
946 'ext': 'mp4',
947 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 948 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 949 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 950 'duration': 133,
0e49d9a6
LL
951 'upload_date': '20151119',
952 'uploader_id': 'IronSoulElf',
ec85ded8 953 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 954 'uploader': 'IronSoulElf',
eb6793ba
S
955 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
956 'track': 'Dark Walk - Position Music',
957 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 958 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
959 },
960 'params': {
961 'skip_download': True,
962 },
963 },
61f92af1 964 {
067aa17e 965 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
966 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
967 'only_matching': True,
968 },
313dfc45
LL
969 {
970 # Video with yt:stretch=17:0
971 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
972 'info_dict': {
973 'id': 'Q39EVAstoRM',
974 'ext': 'mp4',
975 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
976 'description': 'md5:ee18a25c350637c8faff806845bddee9',
977 'upload_date': '20151107',
978 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
979 'uploader': 'CH GAMER DROID',
980 },
981 'params': {
982 'skip_download': True,
983 },
be49068d 984 'skip': 'This video does not exist.',
313dfc45 985 },
7caf9830
S
986 {
987 # Video licensed under Creative Commons
988 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
989 'info_dict': {
990 'id': 'M4gD1WSo5mA',
991 'ext': 'mp4',
992 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
993 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 994 'duration': 721,
7caf9830
S
995 'upload_date': '20150127',
996 'uploader_id': 'BerkmanCenter',
ec85ded8 997 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 998 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
999 'license': 'Creative Commons Attribution license (reuse allowed)',
1000 },
1001 'params': {
1002 'skip_download': True,
1003 },
1004 },
fd050249
S
1005 {
1006 # Channel-like uploader_url
1007 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1008 'info_dict': {
1009 'id': 'eQcmzGIKrzg',
1010 'ext': 'mp4',
1011 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1012 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 1013 'duration': 4060,
fd050249 1014 'upload_date': '20151119',
eb6793ba 1015 'uploader': 'Bernie Sanders',
fd050249 1016 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1017 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1018 'license': 'Creative Commons Attribution license (reuse allowed)',
1019 },
1020 'params': {
1021 'skip_download': True,
1022 },
1023 },
040ac686
S
1024 {
1025 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1026 'only_matching': True,
7f29cf54
S
1027 },
1028 {
067aa17e 1029 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1030 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1031 'only_matching': True,
6496ccb4
S
1032 },
1033 {
1034 # Rental video preview
1035 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1036 'info_dict': {
1037 'id': 'uGpuVWrhIzE',
1038 'ext': 'mp4',
1039 'title': 'Piku - Trailer',
1040 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1041 'upload_date': '20150811',
1042 'uploader': 'FlixMatrix',
1043 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1044 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1045 'license': 'Standard YouTube License',
1046 },
1047 'params': {
1048 'skip_download': True,
1049 },
eb6793ba 1050 'skip': 'This video is not available.',
022a5d66 1051 },
12afdc2a
S
1052 {
1053 # YouTube Red video with episode data
1054 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1055 'info_dict': {
1056 'id': 'iqKdEhx-dD4',
1057 'ext': 'mp4',
1058 'title': 'Isolation - Mind Field (Ep 1)',
4fe54c12 1059 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
556dbe7f 1060 'duration': 2085,
12afdc2a
S
1061 'upload_date': '20170118',
1062 'uploader': 'Vsauce',
1063 'uploader_id': 'Vsauce',
1064 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1065 'series': 'Mind Field',
1066 'season_number': 1,
1067 'episode_number': 1,
1068 },
1069 'params': {
1070 'skip_download': True,
1071 },
1072 'expected_warnings': [
1073 'Skipping DASH manifest',
1074 ],
1075 },
c7121fa7
S
1076 {
1077 # The following content has been identified by the YouTube community
1078 # as inappropriate or offensive to some audiences.
1079 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1080 'info_dict': {
1081 'id': '6SJNVb0GnPI',
1082 'ext': 'mp4',
1083 'title': 'Race Differences in Intelligence',
1084 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1085 'duration': 965,
1086 'upload_date': '20140124',
1087 'uploader': 'New Century Foundation',
1088 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1089 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1090 },
1091 'params': {
1092 'skip_download': True,
1093 },
1094 },
022a5d66
S
1095 {
1096 # itag 212
1097 'url': '1t24XAntNCY',
1098 'only_matching': True,
fd5c4aab
S
1099 },
1100 {
1101 # geo restricted to JP
1102 'url': 'sJL6WA-aGkQ',
1103 'only_matching': True,
1104 },
d0ba5587
S
1105 {
1106 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1107 'only_matching': True,
1108 },
cd5a74a2
S
1109 {
1110 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1111 'only_matching': True,
1112 },
825cd268
RA
1113 {
1114 # DRM protected
1115 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1116 'only_matching': True,
4fe54c12
S
1117 },
1118 {
1119 # Video with unsupported adaptive stream type formats
1120 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1121 'info_dict': {
1122 'id': 'Z4Vy8R84T1U',
1123 'ext': 'mp4',
1124 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1125 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1126 'duration': 433,
1127 'upload_date': '20130923',
1128 'uploader': 'Amelia Putri Harwita',
1129 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1130 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1131 'formats': 'maxcount:10',
1132 },
1133 'params': {
1134 'skip_download': True,
1135 'youtube_include_dash_manifest': False,
1136 },
5caabd3c 1137 },
1138 {
822b9d9c 1139 # Youtube Music Auto-generated description
5caabd3c 1140 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1141 'info_dict': {
1142 'id': 'MgNrAu2pzNs',
1143 'ext': 'mp4',
1144 'title': 'Voyeur Girl',
1145 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1146 'upload_date': '20190312',
1147 'uploader': 'Various Artists - Topic',
1148 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1149 'artist': 'Stephen',
1150 'track': 'Voyeur Girl',
1151 'album': 'it\'s too much love to know my dear',
1152 'release_date': '20190313',
1153 'release_year': 2019,
1154 },
1155 'params': {
1156 'skip_download': True,
1157 },
1158 },
1159 {
822b9d9c 1160 # Youtube Music Auto-generated description
5caabd3c 1161 # Retrieve 'artist' field from 'Artist:' in video description
1162 # when it is present on youtube music video
5caabd3c 1163 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1164 'info_dict': {
1165 'id': 'k0jLE7tTwjY',
1166 'ext': 'mp4',
1167 'title': 'Latch Feat. Sam Smith',
1168 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1169 'upload_date': '20150110',
1170 'uploader': 'Various Artists - Topic',
1171 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1172 'artist': 'Disclosure',
1173 'track': 'Latch Feat. Sam Smith',
1174 'album': 'Latch Featuring Sam Smith',
1175 'release_date': '20121008',
1176 'release_year': 2012,
1177 },
1178 'params': {
1179 'skip_download': True,
1180 },
1181 },
1182 {
822b9d9c 1183 # Youtube Music Auto-generated description
5caabd3c 1184 # handle multiple artists on youtube music video
1185 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1186 'info_dict': {
1187 'id': '74qn0eJSjpA',
1188 'ext': 'mp4',
1189 'title': 'Eastside',
1190 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1191 'upload_date': '20180710',
1192 'uploader': 'Benny Blanco - Topic',
1193 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1194 'artist': 'benny blanco, Halsey, Khalid',
1195 'track': 'Eastside',
1196 'album': 'Eastside',
1197 'release_date': '20180713',
1198 'release_year': 2018,
1199 },
1200 'params': {
1201 'skip_download': True,
1202 },
1203 },
1204 {
822b9d9c 1205 # Youtube Music Auto-generated description
5caabd3c 1206 # handle youtube music video with release_year and no release_date
1207 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1208 'info_dict': {
1209 'id': '-hcAI0g-f5M',
1210 'ext': 'mp4',
1211 'title': 'Put It On Me',
1212 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1213 'upload_date': '20180426',
1214 'uploader': 'Matt Maeson - Topic',
1215 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1216 'artist': 'Matt Maeson',
1217 'track': 'Put It On Me',
1218 'album': 'The Hearse',
1219 'release_date': None,
1220 'release_year': 2018,
1221 },
1222 'params': {
1223 'skip_download': True,
1224 },
1225 },
2eb88d95
PH
1226 ]
1227
e0df6211
PH
1228 def __init__(self, *args, **kwargs):
1229 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1230 self._player_cache = {}
e0df6211 1231
c5e8d7af
PH
1232 def report_video_info_webpage_download(self, video_id):
1233 """Report attempt to download video info webpage."""
69ea8ca4 1234 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1235
c5e8d7af
PH
1236 def report_information_extraction(self, video_id):
1237 """Report attempt to extract video information."""
69ea8ca4 1238 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1239
1240 def report_unavailable_format(self, video_id, format):
1241 """Report extracted video URL."""
69ea8ca4 1242 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1243
1244 def report_rtmp_download(self):
1245 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1246 self.to_screen('RTMP download detected')
c5e8d7af 1247
60064c53
PH
1248 def _signature_cache_id(self, example_sig):
1249 """ Return a string representation of a signature """
78caa52a 1250 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53
PH
1251
1252 def _extract_signature_function(self, video_id, player_url, example_sig):
cf010131 1253 id_m = re.match(
63529e93 1254 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
cf010131 1255 player_url)
c081b35c
PH
1256 if not id_m:
1257 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
1258 player_type = id_m.group('ext')
1259 player_id = id_m.group('id')
1260
c4417ddb 1261 # Read from filesystem cache
60064c53
PH
1262 func_id = '%s_%s_%s' % (
1263 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1264 assert os.path.basename(func_id) == func_id
a0e07d31 1265
69ea8ca4 1266 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1267 if cache_spec is not None:
78caa52a 1268 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1269
6d1a55a5
PH
1270 download_note = (
1271 'Downloading player %s' % player_url
1272 if self._downloader.params.get('verbose') else
1273 'Downloading %s player %s' % (player_type, player_id)
1274 )
e0df6211
PH
1275 if player_type == 'js':
1276 code = self._download_webpage(
1277 player_url, video_id,
6d1a55a5 1278 note=download_note,
69ea8ca4 1279 errnote='Download of %s failed' % player_url)
83799698 1280 res = self._parse_sig_js(code)
c4417ddb 1281 elif player_type == 'swf':
e0df6211
PH
1282 urlh = self._request_webpage(
1283 player_url, video_id,
6d1a55a5 1284 note=download_note,
69ea8ca4 1285 errnote='Download of %s failed' % player_url)
e0df6211 1286 code = urlh.read()
83799698 1287 res = self._parse_sig_swf(code)
e0df6211
PH
1288 else:
1289 assert False, 'Invalid player type %r' % player_type
1290
785521bf
PH
1291 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1292 cache_res = res(test_string)
1293 cache_spec = [ord(c) for c in cache_res]
83799698 1294
69ea8ca4 1295 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1296 return res
1297
60064c53 1298 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1299 def gen_sig_code(idxs):
1300 def _genslice(start, end, step):
78caa52a 1301 starts = '' if start == 0 else str(start)
8bcc8756 1302 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1303 steps = '' if step == 1 else (':%d' % step)
78caa52a 1304 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1305
1306 step = None
7af808a5
PH
1307 # Quelch pyflakes warnings - start will be set when step is set
1308 start = '(Never used)'
edf3e38e
PH
1309 for i, prev in zip(idxs[1:], idxs[:-1]):
1310 if step is not None:
1311 if i - prev == step:
1312 continue
1313 yield _genslice(start, prev, step)
1314 step = None
1315 continue
1316 if i - prev in [-1, 1]:
1317 step = i - prev
1318 start = prev
1319 continue
1320 else:
78caa52a 1321 yield 's[%d]' % prev
edf3e38e 1322 if step is None:
78caa52a 1323 yield 's[%d]' % i
edf3e38e
PH
1324 else:
1325 yield _genslice(start, i, step)
1326
78caa52a 1327 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1328 cache_res = func(test_string)
edf3e38e 1329 cache_spec = [ord(c) for c in cache_res]
78caa52a 1330 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1331 signature_id_tuple = '(%s)' % (
1332 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1333 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1334 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1335 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1336
e0df6211
PH
1337 def _parse_sig_js(self, jscode):
1338 funcname = self._search_regex(
abefc03f
S
1339 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1340 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
31ce6e99 1341 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1342 # Obsolete patterns
1343 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1344 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1345 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1346 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1347 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1348 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1349 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1350 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1351 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1352
1353 jsi = JSInterpreter(jscode)
1354 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1355 return lambda s: initial_function([s])
1356
1357 def _parse_sig_swf(self, file_contents):
54256267 1358 swfi = SWFInterpreter(file_contents)
78caa52a 1359 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1360 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1361 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1362 return lambda s: initial_function([s])
1363
83799698 1364 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1365 """Turn the encrypted s field into a working signature"""
6b37f0be 1366
c8bf86d5 1367 if player_url is None:
69ea8ca4 1368 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1369
69ea8ca4 1370 if player_url.startswith('//'):
78caa52a 1371 player_url = 'https:' + player_url
3c90cc8b
S
1372 elif not re.match(r'https?://', player_url):
1373 player_url = compat_urlparse.urljoin(
1374 'https://www.youtube.com', player_url)
c8bf86d5 1375 try:
62af3a0e 1376 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1377 if player_id not in self._player_cache:
1378 func = self._extract_signature_function(
60064c53 1379 video_id, player_url, s
c8bf86d5
PH
1380 )
1381 self._player_cache[player_id] = func
1382 func = self._player_cache[player_id]
1383 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1384 self._print_sig_code(func, s)
c8bf86d5
PH
1385 return func(s)
1386 except Exception as e:
1387 tb = traceback.format_exc()
1388 raise ExtractorError(
78caa52a 1389 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1390
360e1ca5 1391 def _get_subtitles(self, video_id, webpage):
de7f3446 1392 try:
60e47a26 1393 subs_doc = self._download_xml(
38c2e5b8 1394 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1395 video_id, note=False)
1396 except ExtractorError as err:
9b9c5355 1397 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1398 return {}
de7f3446
JMF
1399
1400 sub_lang_list = {}
60e47a26
JMF
1401 for track in subs_doc.findall('track'):
1402 lang = track.attrib['lang_code']
7e660ac1
LD
1403 if lang in sub_lang_list:
1404 continue
360e1ca5 1405 sub_formats = []
23d17e4b 1406 for ext in self._SUBTITLE_FORMATS:
15707c7e 1407 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1408 'lang': lang,
1409 'v': video_id,
1410 'fmt': ext,
1411 'name': track.attrib['name'].encode('utf-8'),
1412 })
1413 sub_formats.append({
1414 'url': 'https://www.youtube.com/api/timedtext?' + params,
1415 'ext': ext,
1416 })
1417 sub_lang_list[lang] = sub_formats
de7f3446 1418 if not sub_lang_list:
69ea8ca4 1419 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1420 return {}
1421 return sub_lang_list
1422
a72778d3
S
1423 def _get_ytplayer_config(self, video_id, webpage):
1424 patterns = (
526b3b07
S
1425 # User data may contain arbitrary character sequences that may affect
1426 # JSON extraction with regex, e.g. when '};' is contained the second
1427 # regex won't capture the whole JSON. Yet working around by trying more
1428 # concrete regex first keeping in mind proper quoted string handling
1429 # to be implemented in future that will replace this workaround (see
067aa17e
S
1430 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1431 # https://github.com/ytdl-org/youtube-dl/pull/7599)
a72778d3
S
1432 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1433 r';ytplayer\.config\s*=\s*({.+?});',
1434 )
1435 config = self._search_regex(
1436 patterns, webpage, 'ytplayer.config', default=None)
1437 if config:
1438 return self._parse_json(
1439 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1440
360e1ca5 1441 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1442 """We need the webpage for getting the captions url, pass it as an
1443 argument to speed up the process."""
69ea8ca4 1444 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1445 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1446 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1447 if not player_config:
de7f3446
JMF
1448 self._downloader.report_warning(err_msg)
1449 return {}
de7f3446 1450 try:
0792d563 1451 args = player_config['args']
b78b292f
S
1452 caption_url = args.get('ttsurl')
1453 if caption_url:
1454 timestamp = args['timestamp']
1455 # We get the available subtitles
15707c7e 1456 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1457 'type': 'list',
1458 'tlangs': 1,
1459 'asrs': 1,
1460 })
1461 list_url = caption_url + '&' + list_params
1462 caption_list = self._download_xml(list_url, video_id)
1463 original_lang_node = caption_list.find('track')
1464 if original_lang_node is None:
1465 self._downloader.report_warning('Video doesn\'t have automatic captions')
1466 return {}
1467 original_lang = original_lang_node.attrib['lang_code']
1468 caption_kind = original_lang_node.attrib.get('kind', '')
1469
1470 sub_lang_list = {}
1471 for lang_node in caption_list.findall('target'):
1472 sub_lang = lang_node.attrib['lang_code']
1473 sub_formats = []
1474 for ext in self._SUBTITLE_FORMATS:
15707c7e 1475 params = compat_urllib_parse_urlencode({
b78b292f
S
1476 'lang': original_lang,
1477 'tlang': sub_lang,
1478 'fmt': ext,
1479 'ts': timestamp,
1480 'kind': caption_kind,
1481 })
1482 sub_formats.append({
1483 'url': caption_url + '&' + params,
1484 'ext': ext,
1485 })
1486 sub_lang_list[sub_lang] = sub_formats
1487 return sub_lang_list
1488
ddbb4c5c
S
1489 def make_captions(sub_url, sub_langs):
1490 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1491 caption_qs = compat_parse_qs(parsed_sub_url.query)
1492 captions = {}
1493 for sub_lang in sub_langs:
1494 sub_formats = []
1495 for ext in self._SUBTITLE_FORMATS:
1496 caption_qs.update({
1497 'tlang': [sub_lang],
1498 'fmt': [ext],
1499 })
1500 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1501 query=compat_urllib_parse_urlencode(caption_qs, True)))
1502 sub_formats.append({
1503 'url': sub_url,
1504 'ext': ext,
1505 })
1506 captions[sub_lang] = sub_formats
1507 return captions
1508
1509 # New captions format as of 22.06.2017
1510 player_response = args.get('player_response')
1511 if player_response and isinstance(player_response, compat_str):
1512 player_response = self._parse_json(
1513 player_response, video_id, fatal=False)
1514 if player_response:
1515 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1516 base_url = renderer['captionTracks'][0]['baseUrl']
1517 sub_lang_list = []
1518 for lang in renderer['translationLanguages']:
1519 lang_code = lang.get('languageCode')
1520 if lang_code:
1521 sub_lang_list.append(lang_code)
1522 return make_captions(base_url, sub_lang_list)
1523
b78b292f
S
1524 # Some videos don't provide ttsurl but rather caption_tracks and
1525 # caption_translation_languages (e.g. 20LmZk1hakA)
ddbb4c5c 1526 # Does not used anymore as of 22.06.2017
b78b292f
S
1527 caption_tracks = args['caption_tracks']
1528 caption_translation_languages = args['caption_translation_languages']
1529 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
ddbb4c5c 1530 sub_lang_list = []
b78b292f
S
1531 for lang in caption_translation_languages.split(','):
1532 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1533 sub_lang = lang_qs.get('lc', [None])[0]
ddbb4c5c
S
1534 if sub_lang:
1535 sub_lang_list.append(sub_lang)
1536 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1537 # An extractor error can be raise by the download process if there are
1538 # no automatic captions but there are subtitles
ddbb4c5c 1539 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1540 self._downloader.report_warning(err_msg)
1541 return {}
1542
21c340b8
S
1543 def _mark_watched(self, video_id, video_info, player_response):
1544 playback_url = url_or_none(try_get(
1545 player_response,
1546 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1547 video_info, lambda x: x['videostats_playback_base_url'][0]))
d77ab8e2
S
1548 if not playback_url:
1549 return
1550 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1551 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1552
1553 # cpn generation algorithm is reverse engineered from base.js.
1554 # In fact it works even with dummy cpn.
1555 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1556 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1557
1558 qs.update({
1559 'ver': ['2'],
1560 'cpn': [cpn],
1561 })
1562 playback_url = compat_urlparse.urlunparse(
15707c7e 1563 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1564
1565 self._download_webpage(
1566 playback_url, video_id, 'Marking watched',
1567 'Unable to mark watched', fatal=False)
1568
66c9fa36
S
1569 @staticmethod
1570 def _extract_urls(webpage):
1571 # Embedded YouTube player
1572 entries = [
1573 unescapeHTML(mobj.group('url'))
1574 for mobj in re.finditer(r'''(?x)
1575 (?:
1576 <iframe[^>]+?src=|
1577 data-video-url=|
1578 <embed[^>]+?src=|
1579 embedSWF\(?:\s*|
1580 <object[^>]+data=|
1581 new\s+SWFObject\(
1582 )
1583 (["\'])
1584 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1585 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1586 \1''', webpage)]
1587
1588 # lazyYT YouTube embed
1589 entries.extend(list(map(
1590 unescapeHTML,
1591 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1592
1593 # Wordpress "YouTube Video Importer" plugin
1594 matches = re.findall(r'''(?x)<div[^>]+
1595 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1596 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1597 entries.extend(m[-1] for m in matches)
1598
1599 return entries
1600
1601 @staticmethod
1602 def _extract_url(webpage):
1603 urls = YoutubeIE._extract_urls(webpage)
1604 return urls[0] if urls else None
1605
97665381
PH
1606 @classmethod
1607 def extract_id(cls, url):
1608 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1609 if mobj is None:
69ea8ca4 1610 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1611 video_id = mobj.group(2)
1612 return video_id
1613
9cafc3fd
S
1614 @staticmethod
1615 def _extract_chapters(description, duration):
1616 if not description:
1617 return None
1618 chapter_lines = re.findall(
1619 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1620 description)
1621 if not chapter_lines:
1622 return None
1623 chapters = []
1624 for next_num, (chapter_line, time_point) in enumerate(
1625 chapter_lines, start=1):
1626 start_time = parse_duration(time_point)
1627 if start_time is None:
1628 continue
39d4c1be
S
1629 if start_time > duration:
1630 break
9cafc3fd
S
1631 end_time = (duration if next_num == len(chapter_lines)
1632 else parse_duration(chapter_lines[next_num][1]))
1633 if end_time is None:
1634 continue
39d4c1be
S
1635 if end_time > duration:
1636 end_time = duration
1637 if start_time > end_time:
1638 break
9cafc3fd
S
1639 chapter_title = re.sub(
1640 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1641 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1642 chapters.append({
1643 'start_time': start_time,
1644 'end_time': end_time,
1645 'title': chapter_title,
1646 })
1647 return chapters
1648
c5e8d7af 1649 def _real_extract(self, url):
cf7e015f
S
1650 url, smuggled_data = unsmuggle_url(url, {})
1651
7e8c0af0 1652 proto = (
78caa52a
PH
1653 'http' if self._downloader.params.get('prefer_insecure', False)
1654 else 'https')
7e8c0af0 1655
7c80519c 1656 start_time = None
297a564b 1657 end_time = None
7c80519c
JMF
1658 parsed_url = compat_urllib_parse_urlparse(url)
1659 for component in [parsed_url.fragment, parsed_url.query]:
1660 query = compat_parse_qs(component)
297a564b 1661 if start_time is None and 't' in query:
7c80519c 1662 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1663 if start_time is None and 'start' in query:
1664 start_time = parse_duration(query['start'][0])
297a564b
JMF
1665 if end_time is None and 'end' in query:
1666 end_time = parse_duration(query['end'][0])
7c80519c 1667
c5e8d7af
PH
1668 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1669 mobj = re.search(self._NEXT_URL_RE, url)
1670 if mobj:
7fd002c0 1671 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1672 video_id = self.extract_id(url)
c5e8d7af
PH
1673
1674 # Get video webpage
aa79ac0c 1675 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
a1f934b1 1676 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
1677
1678 # Attempt to extract SWF player URL
e0df6211 1679 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1680 if mobj is not None:
1681 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1682 else:
1683 player_url = None
1684
d8d24a92
S
1685 dash_mpds = []
1686
1687 def add_dash_mpd(video_info):
1688 dash_mpd = video_info.get('dashmpd')
1689 if dash_mpd and dash_mpd[0] not in dash_mpds:
1690 dash_mpds.append(dash_mpd[0])
1691
561b456e
S
1692 def add_dash_mpd_pr(pl_response):
1693 dash_mpd = url_or_none(try_get(
1694 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1695 compat_str))
1696 if dash_mpd and dash_mpd not in dash_mpds:
1697 dash_mpds.append(dash_mpd)
1698
c7121fa7
S
1699 is_live = None
1700 view_count = None
1701
1702 def extract_view_count(v_info):
1703 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1704
026fbedc
S
1705 def extract_token(v_info):
1706 return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
1707
c2d125d9
S
1708 def extract_player_response(player_response, video_id):
1709 pl_response = str_or_none(player_response)
1710 if not pl_response:
1711 return
1712 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1713 if isinstance(pl_response, dict):
1714 add_dash_mpd_pr(pl_response)
1715 return pl_response
1716
dbdaaa23
S
1717 player_response = {}
1718
c5e8d7af 1719 # Get video info
6449cd80 1720 embed_webpage = None
c108eb73 1721 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
1722 age_gate = True
1723 # We simulate the access to the video from www.youtube.com/v/{video_id}
1724 # this can be viewed without login into Youtube
beb95e77
CL
1725 url = proto + '://www.youtube.com/embed/%s' % video_id
1726 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
15707c7e 1727 data = compat_urllib_parse_urlencode({
2c57c7fa
JMF
1728 'video_id': video_id,
1729 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 1730 'sts': self._search_regex(
beb95e77 1731 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 1732 })
7e8c0af0 1733 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
94bd3613
PH
1734 video_info_webpage = self._download_webpage(
1735 video_info_url, video_id,
20436c30 1736 note='Refetching age-gated info webpage',
94bd3613 1737 errnote='unable to download video info webpage')
c5e8d7af 1738 video_info = compat_parse_qs(video_info_webpage)
c2d125d9
S
1739 pl_response = video_info.get('player_response', [None])[0]
1740 player_response = extract_player_response(pl_response, video_id)
d8d24a92 1741 add_dash_mpd(video_info)
c2d125d9 1742 view_count = extract_view_count(video_info)
c108eb73
JMF
1743 else:
1744 age_gate = False
bc93bdb5 1745 video_info = None
dc4e4f90 1746 sts = None
d8d24a92 1747 # Try looking directly into the video webpage
a72778d3
S
1748 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1749 if ytplayer_config:
4e62ebe2 1750 args = ytplayer_config['args']
4c76aa06 1751 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1752 # Convert to the same format returned by compat_parse_qs
1753 video_info = dict((k, [v]) for k, v in args.items())
1754 add_dash_mpd(video_info)
6496ccb4
S
1755 # Rental video is not rented but preview is available (e.g.
1756 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
067aa17e 1757 # https://github.com/ytdl-org/youtube-dl/issues/10532)
6496ccb4
S
1758 if not video_info and args.get('ypc_vid'):
1759 return self.url_result(
1760 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1761 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1762 is_live = True
dc4e4f90 1763 sts = ytplayer_config.get('sts')
dbdaaa23 1764 if not player_response:
c2d125d9 1765 player_response = extract_player_response(args.get('player_response'), video_id)
0a3cf9ad 1766 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
561b456e 1767 add_dash_mpd_pr(player_response)
0a3cf9ad
S
1768 # We also try looking in get_video_info since it may contain different dashmpd
1769 # URL that points to a DASH manifest with possibly different itag set (some itags
1770 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1771 # manifest pointed by get_video_info's dashmpd).
1772 # The general idea is to take a union of itags of both DASH manifests (for example
067aa17e 1773 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
4e62ebe2 1774 self.report_video_info_webpage_download(video_id)
a61ce714 1775 for el in ('embedded', 'detailpage', 'vevo', ''):
dc4e4f90
S
1776 query = {
1777 'video_id': video_id,
1778 'ps': 'default',
1779 'eurl': '',
1780 'gl': 'US',
1781 'hl': 'en',
1782 }
1783 if el:
1784 query['el'] = el
1785 if sts:
1786 query['sts'] = sts
810fb84d 1787 video_info_webpage = self._download_webpage(
dc4e4f90 1788 '%s://www.youtube.com/get_video_info' % proto,
4e62ebe2 1789 video_id, note=False,
dc4e4f90
S
1790 errnote='unable to download video info webpage',
1791 fatal=False, query=query)
1792 if not video_info_webpage:
1793 continue
0a3cf9ad 1794 get_video_info = compat_parse_qs(video_info_webpage)
dbdaaa23
S
1795 if not player_response:
1796 pl_response = get_video_info.get('player_response', [None])[0]
c2d125d9 1797 player_response = extract_player_response(pl_response, video_id)
fd545fc6 1798 add_dash_mpd(get_video_info)
c7121fa7
S
1799 if view_count is None:
1800 view_count = extract_view_count(get_video_info)
0a3cf9ad
S
1801 if not video_info:
1802 video_info = get_video_info
026fbedc 1803 get_token = extract_token(get_video_info)
56667d62 1804 if get_token:
89ea063e
S
1805 # Different get_video_info requests may report different results, e.g.
1806 # some may report video unavailability, but some may serve it without
067aa17e 1807 # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
89ea063e
S
1808 # the original webpage as well as el=info and el=embedded get_video_info
1809 # requests report video unavailability due to geo restriction while
1810 # el=detailpage succeeds and returns valid data). This is probably
1811 # due to YouTube measures against IP ranges of hosting providers.
1812 # Working around by preferring the first succeeded video_info containing
1813 # the token if no such video_info yet was found.
026fbedc 1814 token = extract_token(video_info)
56667d62 1815 if not token:
44b2264f 1816 video_info = get_video_info
4e62ebe2 1817 break
bbb7c3f7
YCH
1818
1819 def extract_unavailable_message():
0add33ab
S
1820 messages = []
1821 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1822 msg = self._html_search_regex(
1823 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1824 video_webpage, 'unavailable %s' % kind, default=None)
1825 if msg:
1826 messages.append(msg)
1827 if messages:
1828 return '\n'.join(messages)
bbb7c3f7 1829
15be3eb5
RA
1830 if not video_info:
1831 unavailable_message = extract_unavailable_message()
1832 if not unavailable_message:
1833 unavailable_message = 'Unable to extract video data'
1834 raise ExtractorError(
1835 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1836
dbdaaa23
S
1837 video_details = try_get(
1838 player_response, lambda x: x['videoDetails'], dict) or {}
1839
8dbf751a
RA
1840 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1841 if not video_title:
cf7e015f
S
1842 self._downloader.report_warning('Unable to extract video title')
1843 video_title = '_'
1844
9cafc3fd 1845 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1846 if video_description:
fa4bc6e7
RA
1847
1848 def replace_url(m):
1849 redir_url = compat_urlparse.urljoin(url, m.group(1))
1850 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1851 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1852 qs = compat_parse_qs(parsed_redir_url.query)
1853 q = qs.get('q')
1854 if q and q[0]:
1855 return q[0]
1856 return redir_url
1857
9cafc3fd 1858 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1859 <a\s+
25cb7a0e 1860 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1861 (?:title|href)="([^"]+)"\s+
25cb7a0e 1862 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1863 class="[^"]*"[^>]*>
23f13e97 1864 [^<]+\.{3}\s*
cf7e015f 1865 </a>
fa4bc6e7 1866 ''', replace_url, video_description)
cf7e015f
S
1867 video_description = clean_html(video_description)
1868 else:
8dbf751a 1869 video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
cf7e015f 1870
8fe10494 1871 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1872 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1873 multifeed_metadata_list = try_get(
1874 player_response,
1875 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1876 compat_str) or try_get(
1877 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1878 if multifeed_metadata_list:
1879 entries = []
1880 feed_ids = []
1881 for feed in multifeed_metadata_list.split(','):
1882 # Unquote should take place before split on comma (,) since textual
1883 # fields may contain comma as well (see
067aa17e 1884 # https://github.com/ytdl-org/youtube-dl/issues/8536)
8fe10494
S
1885 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1886 entries.append({
1887 '_type': 'url_transparent',
1888 'ie_key': 'Youtube',
1889 'url': smuggle_url(
1890 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1891 {'force_singlefeed': True}),
1892 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1893 })
1894 feed_ids.append(feed_data['id'][0])
1895 self.to_screen(
1896 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1897 % (', '.join(feed_ids), video_id))
1898 return self.playlist_result(entries, video_id, video_title, video_description)
1899 else:
1900 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1901
c7121fa7 1902 if view_count is None:
1c9c8de2 1903 view_count = extract_view_count(video_info)
dbdaaa23
S
1904 if view_count is None and video_details:
1905 view_count = int_or_none(video_details.get('viewCount'))
1d699755 1906
27019dbb 1907 if is_live is None:
898238e9 1908 is_live = bool_or_none(video_details.get('isLive'))
27019dbb 1909
c5e8d7af
PH
1910 # Check for "rental" videos
1911 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
067aa17e 1912 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 1913
c63ca0ee
S
1914 def _extract_filesize(media_url):
1915 return int_or_none(self._search_regex(
1916 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1917
c5e8d7af
PH
1918 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1919 self.report_rtmp_download()
dd27fd17
PH
1920 formats = [{
1921 'format_id': '_rtmp',
1922 'protocol': 'rtmp',
1923 'url': video_info['conn'][0],
1924 'player_url': player_url,
1925 }]
391dd6f0 1926 elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 1927 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1928 if 'rtmpe%3Dyes' in encoded_url_map:
067aa17e 1929 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
3318832e 1930 formats_spec = {}
82156fdb 1931 fmt_list = video_info.get('fmt_list', [''])[0]
1932 if fmt_list:
1933 for fmt in fmt_list.split(','):
1934 spec = fmt.split('/')
3318832e 1935 if len(spec) > 1:
1936 width_height = spec[1].split('x')
1937 if len(width_height) == 2:
1938 formats_spec[spec[0]] = {
1939 'resolution': spec[1],
1940 'width': int_or_none(width_height[0]),
1941 'height': int_or_none(width_height[1]),
1942 }
54fc90aa 1943 q = qualities(['small', 'medium', 'hd720'])
140a13f5
RA
1944 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
1945 if streaming_formats:
1946 for fmt in streaming_formats:
1947 itag = str_or_none(fmt.get('itag'))
1948 if not itag:
1949 continue
1950 quality = fmt.get('quality')
1951 quality_label = fmt.get('qualityLabel') or quality
1952 formats_spec[itag] = {
1953 'asr': int_or_none(fmt.get('audioSampleRate')),
1954 'filesize': int_or_none(fmt.get('contentLength')),
1955 'format_note': quality_label,
1956 'fps': int_or_none(fmt.get('fps')),
1957 'height': int_or_none(fmt.get('height')),
1958 'quality': q(quality),
1959 # bitrate for itag 43 is always 2147483647
1960 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1961 'width': int_or_none(fmt.get('width')),
1962 }
c9afb51c 1963 formats = []
00fe14fc 1964 for url_data_str in encoded_url_map.split(','):
c5e8d7af 1965 url_data = compat_parse_qs(url_data_str)
0d297518 1966 if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
201e9eaa 1967 continue
2f483bc1
S
1968 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1969 # Unsupported FORMAT_STREAM_TYPE_OTF
1970 if stream_type == 3:
1971 continue
201e9eaa
PH
1972 format_id = url_data['itag'][0]
1973 url = url_data['url'][0]
1974
a49eccdf 1975 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
6449cd80 1976 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
beb95e77 1977 jsplayer_url_json = self._search_regex(
6449cd80
PH
1978 ASSETS_RE,
1979 embed_webpage if age_gate else video_webpage,
1980 'JS player URL (1)', default=None)
1981 if not jsplayer_url_json and not age_gate:
1982 # We need the embed website after all
1983 if embed_webpage is None:
1984 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1985 embed_webpage = self._download_webpage(
1986 embed_url, video_id, 'Downloading embed webpage')
1987 jsplayer_url_json = self._search_regex(
1988 ASSETS_RE, embed_webpage, 'JS player URL')
1989
beb95e77 1990 player_url = json.loads(jsplayer_url_json)
201e9eaa
PH
1991 if player_url is None:
1992 player_url_json = self._search_regex(
1993 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
78caa52a 1994 video_webpage, 'age gate player URL')
201e9eaa
PH
1995 player_url = json.loads(player_url_json)
1996
a49eccdf
YCH
1997 if 'sig' in url_data:
1998 url += '&signature=' + url_data['sig'][0]
1999 elif 's' in url_data:
2000 encrypted_sig = url_data['s'][0]
2001
201e9eaa 2002 if self._downloader.params.get('verbose'):
cf010131 2003 if player_url is None:
201e9eaa
PH
2004 player_version = 'unknown'
2005 player_desc = 'unknown'
2006 else:
2007 if player_url.endswith('swf'):
2008 player_version = self._search_regex(
2009 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
78caa52a 2010 'flash player', fatal=False)
201e9eaa 2011 player_desc = 'flash player %s' % player_version
cf010131 2012 else:
201e9eaa 2013 player_version = self._search_regex(
b62985a9 2014 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
63529e93 2015 r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
201e9eaa
PH
2016 player_url,
2017 'html5 player', fatal=False)
78caa52a 2018 player_desc = 'html5 player %s' % player_version
201e9eaa 2019
60064c53 2020 parts_sizes = self._signature_cache_id(encrypted_sig)
69ea8ca4 2021 self.to_screen('{%s} signature length %s, %s' %
9e1a5b84 2022 (format_id, parts_sizes, player_desc))
201e9eaa
PH
2023
2024 signature = self._decrypt_signature(
2025 encrypted_sig, video_id, player_url, age_gate)
027ffdca
S
2026 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2027 url += '&%s=%s' % (sp, signature)
201e9eaa
PH
2028 if 'ratebypass' not in url:
2029 url += '&ratebypass=yes'
c9afb51c 2030
94278f72
YCH
2031 dct = {
2032 'format_id': format_id,
2033 'url': url,
2034 'player_url': player_url,
2035 }
2036 if format_id in self._formats:
2037 dct.update(self._formats[format_id])
3318832e 2038 if format_id in formats_spec:
2039 dct.update(formats_spec[format_id])
94278f72 2040
aabc2be6 2041 # Some itags are not included in DASH manifest thus corresponding formats will
067aa17e 2042 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
aabc2be6
S
2043 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2044 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2045 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 2046
c63ca0ee
S
2047 filesize = int_or_none(url_data.get(
2048 'clen', [None])[0]) or _extract_filesize(url)
2049
140a13f5 2050 quality = url_data.get('quality', [None])[0]
54fc90aa 2051
94278f72 2052 more_fields = {
c63ca0ee 2053 'filesize': filesize,
aabc2be6 2054 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
c9afb51c
AH
2055 'width': width,
2056 'height': height,
2057 'fps': int_or_none(url_data.get('fps', [None])[0]),
140a13f5 2058 'format_note': url_data.get('quality_label', [None])[0] or quality,
54fc90aa 2059 'quality': q(quality),
c9afb51c 2060 }
94278f72
YCH
2061 for key, value in more_fields.items():
2062 if value:
2063 dct[key] = value
aabc2be6
S
2064 type_ = url_data.get('type', [None])[0]
2065 if type_:
2066 type_split = type_.split(';')
2067 kind_ext = type_split[0].split('/')
2068 if len(kind_ext) == 2:
94278f72
YCH
2069 kind, _ = kind_ext
2070 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
2071 if kind in ('audio', 'video'):
2072 codecs = None
2073 for mobj in re.finditer(
2074 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2075 if mobj.group('key') == 'codecs':
2076 codecs = mobj.group('val')
2077 break
2078 if codecs:
6310acf5 2079 dct.update(parse_codecs(codecs))
e4a60912
S
2080 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2081 dct['downloader_options'] = {
2082 # Youtube throttles chunks >~10M
2083 'http_chunk_size': 10485760,
2084 }
aabc2be6 2085 formats.append(dct)
c5e8d7af 2086 else:
c3e54389
S
2087 manifest_url = (
2088 url_or_none(try_get(
2089 player_response,
2090 lambda x: x['streamingData']['hlsManifestUrl'],
3089bc74
S
2091 compat_str))
2092 or url_or_none(try_get(
c3e54389
S
2093 video_info, lambda x: x['hlsvp'][0], compat_str)))
2094 if manifest_url:
2095 formats = []
2096 m3u8_formats = self._extract_m3u8_formats(
2097 manifest_url, video_id, 'mp4', fatal=False)
2098 for a_format in m3u8_formats:
2099 itag = self._search_regex(
2100 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2101 if itag:
2102 a_format['format_id'] = itag
2103 if itag in self._formats:
2104 dct = self._formats[itag].copy()
2105 dct.update(a_format)
2106 a_format = dct
2107 a_format['player_url'] = player_url
2108 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2109 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2110 formats.append(a_format)
2111 else:
13577349 2112 error_message = extract_unavailable_message()
c3e54389 2113 if not error_message:
13577349
S
2114 error_message = clean_html(try_get(
2115 player_response, lambda x: x['playabilityStatus']['reason'],
2116 compat_str))
2117 if not error_message:
2118 error_message = clean_html(
2119 try_get(video_info, lambda x: x['reason'][0], compat_str))
c3e54389
S
2120 if error_message:
2121 raise ExtractorError(error_message, expected=True)
2122 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 2123
7e72694b 2124 # uploader
dbdaaa23
S
2125 video_uploader = try_get(
2126 video_info, lambda x: x['author'][0],
2127 compat_str) or str_or_none(video_details.get('author'))
7e72694b
S
2128 if video_uploader:
2129 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2130 else:
2131 self._downloader.report_warning('unable to extract uploader name')
2132
2133 # uploader_id
2134 video_uploader_id = None
2135 video_uploader_url = None
2136 mobj = re.search(
2137 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2138 video_webpage)
2139 if mobj is not None:
2140 video_uploader_id = mobj.group('uploader_id')
2141 video_uploader_url = mobj.group('uploader_url')
2142 else:
2143 self._downloader.report_warning('unable to extract uploader nickname')
2144
b45a9e69 2145 channel_id = (
3089bc74
S
2146 str_or_none(video_details.get('channelId'))
2147 or self._html_search_meta(
2148 'channelId', video_webpage, 'channel id', default=None)
2149 or self._search_regex(
b45a9e69 2150 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2151 video_webpage, 'channel id', default=None, group='id'))
dd4c4492
S
2152 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2153
7e72694b
S
2154 # thumbnail image
2155 # We try first to get a high quality image:
2156 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2157 video_webpage, re.DOTALL)
2158 if m_thumb is not None:
2159 video_thumbnail = m_thumb.group(1)
2160 elif 'thumbnail_url' not in video_info:
2161 self._downloader.report_warning('unable to extract video thumbnail')
2162 video_thumbnail = None
2163 else: # don't panic if we can't find it
2164 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2165
2166 # upload date
2167 upload_date = self._html_search_meta(
2168 'datePublished', video_webpage, 'upload date', default=None)
2169 if not upload_date:
2170 upload_date = self._search_regex(
2171 [r'(?s)id="eow-date.*?>(.*?)</span>',
2172 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2173 video_webpage, 'upload date', default=None)
2174 upload_date = unified_strdate(upload_date)
2175
2176 video_license = self._html_search_regex(
2177 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2178 video_webpage, 'license', default=None)
2179
2180 m_music = re.search(
2181 r'''(?x)
2182 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2183 <ul[^>]*>\s*
2184 <li>(?P<title>.+?)
2185 by (?P<creator>.+?)
2186 (?:
2187 \(.+?\)|
2188 <a[^>]*
2189 (?:
2190 \bhref=["\']/red[^>]*>| # drop possible
2191 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2192 )
2193 .*?
2194 )?</li
2195 ''',
2196 video_webpage)
2197 if m_music:
2198 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2199 video_creator = clean_html(m_music.group('creator'))
2200 else:
2201 video_alt_title = video_creator = None
2202
2203 def extract_meta(field):
2204 return self._html_search_regex(
2205 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2206 video_webpage, field, default=None)
2207
2208 track = extract_meta('Song')
2209 artist = extract_meta('Artist')
92bc97d3 2210 album = extract_meta('Album')
822b9d9c
RA
2211
2212 # Youtube Music Auto-generated description
92bc97d3 2213 release_date = release_year = None
822b9d9c
RA
2214 if video_description:
2215 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2216 if mobj:
2217 if not track:
2218 track = mobj.group('track').strip()
2219 if not artist:
2220 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
92bc97d3
RA
2221 if not album:
2222 album = mobj.group('album'.strip())
822b9d9c
RA
2223 release_year = mobj.group('release_year')
2224 release_date = mobj.group('release_date')
2225 if release_date:
2226 release_date = release_date.replace('-', '')
2227 if not release_year:
2228 release_year = int(release_date[:4])
2229 if release_year:
2230 release_year = int(release_year)
7e72694b
S
2231
2232 m_episode = re.search(
2233 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2234 video_webpage)
2235 if m_episode:
c2dd2dc0 2236 series = unescapeHTML(m_episode.group('series'))
7e72694b
S
2237 season_number = int(m_episode.group('season'))
2238 episode_number = int(m_episode.group('episode'))
2239 else:
2240 series = season_number = episode_number = None
2241
2242 m_cat_container = self._search_regex(
2243 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2244 video_webpage, 'categories', default=None)
2245 if m_cat_container:
2246 category = self._html_search_regex(
2247 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2248 default=None)
2249 video_categories = None if category is None else [category]
2250 else:
2251 video_categories = None
2252
2253 video_tags = [
2254 unescapeHTML(m.group('content'))
2255 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2256
2257 def _extract_count(count_name):
2258 return str_to_int(self._search_regex(
2259 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2260 % re.escape(count_name),
2261 video_webpage, count_name, default=None))
2262
2263 like_count = _extract_count('like')
2264 dislike_count = _extract_count('dislike')
2265
dbdaaa23
S
2266 if view_count is None:
2267 view_count = str_to_int(self._search_regex(
2268 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2269 'view count', default=None))
2270
bf3c9326
S
2271 average_rating = (
2272 float_or_none(video_details.get('averageRating'))
2273 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2274
7e72694b
S
2275 # subtitles
2276 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2277 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2278
2279 video_duration = try_get(
2280 video_info, lambda x: int_or_none(x['length_seconds'][0]))
dbdaaa23
S
2281 if not video_duration:
2282 video_duration = int_or_none(video_details.get('lengthSeconds'))
7e72694b
S
2283 if not video_duration:
2284 video_duration = parse_duration(self._html_search_meta(
2285 'duration', video_webpage, 'video duration'))
2286
2287 # annotations
2288 video_annotations = None
2289 if self._downloader.params.get('writeannotations', False):
64b6a4e9
RA
2290 xsrf_token = self._search_regex(
2291 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2292 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2293 invideo_url = try_get(
2294 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2295 if xsrf_token and invideo_url:
2296 xsrf_field_name = self._search_regex(
2297 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2298 video_webpage, 'xsrf field name',
2299 group='xsrf_field_name', default='session_token')
2300 video_annotations = self._download_webpage(
2301 self._proto_relative_url(invideo_url),
2302 video_id, note='Downloading annotations',
2303 errnote='Unable to download video annotations', fatal=False,
2304 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b
S
2305
2306 chapters = self._extract_chapters(description_original, video_duration)
2307
dd27fd17 2308 # Look for the DASH manifest
203fb43f 2309 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2310 dash_mpd_fatal = True
8ff648e4 2311 for mpd_url in dash_mpds:
d8d24a92 2312 dash_formats = {}
774e208f 2313 try:
05d0d131
YCH
2314 def decrypt_sig(mobj):
2315 s = mobj.group(1)
2316 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2317 return '/signature/%s' % dec_s
2318
8ff648e4 2319 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2320
8ff648e4 2321 for df in self._extract_mpd_formats(
2322 mpd_url, video_id, fatal=dash_mpd_fatal,
2323 formats_dict=self._formats):
c63ca0ee
S
2324 if not df.get('filesize'):
2325 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2326 # Do not overwrite DASH format found in some previous DASH manifest
2327 if df['format_id'] not in dash_formats:
2328 dash_formats[df['format_id']] = df
77c6fb5b
S
2329 # Additional DASH manifests may end up in HTTP Error 403 therefore
2330 # allow them to fail without bug report message if we already have
2331 # some DASH manifest succeeded. This is temporary workaround to reduce
2332 # burst of bug reports until we figure out the reason and whether it
2333 # can be fixed at all.
2334 dash_mpd_fatal = False
774e208f
PH
2335 except (ExtractorError, KeyError) as e:
2336 self.report_warning(
2337 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2338 if dash_formats:
04b3b3df
JMF
2339 # Remove the formats we found through non-DASH, they
2340 # contain less info and it can be wrong, because we use
2341 # fixed values (for example the resolution). See
067aa17e 2342 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
04b3b3df 2343 # example.
d80265cc 2344 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2345 formats.extend(dash_formats.values())
d80044c2 2346
6271f1ca
PH
2347 # Check for malformed aspect ratio
2348 stretched_m = re.search(
2349 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2350 video_webpage)
2351 if stretched_m:
313dfc45
LL
2352 w = float(stretched_m.group('w'))
2353 h = float(stretched_m.group('h'))
5faf9fed
S
2354 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2355 # We will only process correct ratios.
313dfc45 2356 if w > 0 and h > 0:
41f24c32 2357 ratio = w / h
313dfc45
LL
2358 for f in formats:
2359 if f.get('vcodec') != 'none':
2360 f['stretched_ratio'] = ratio
6271f1ca 2361
026fbedc
S
2362 if not formats:
2363 token = extract_token(video_info)
2364 if not token:
2365 if 'reason' in video_info:
2366 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2367 regions_allowed = self._html_search_meta(
2368 'regionsAllowed', video_webpage, default=None)
2369 countries = regions_allowed.split(',') if regions_allowed else None
2370 self.raise_geo_restricted(
2371 msg=video_info['reason'][0], countries=countries)
2372 reason = video_info['reason'][0]
2373 if 'Invalid parameters' in reason:
2374 unavailable_message = extract_unavailable_message()
2375 if unavailable_message:
2376 reason = unavailable_message
2377 raise ExtractorError(
2378 'YouTube said: %s' % reason,
2379 expected=True, video_id=video_id)
2380 else:
2381 raise ExtractorError(
2382 '"token" parameter not in video info for unknown reason',
2383 video_id=video_id)
2384
0d297518
RA
2385 if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
2386 raise ExtractorError('This video is DRM protected.', expected=True)
2387
4bcc7bd1 2388 self._sort_formats(formats)
4ea3be0a 2389
21c340b8 2390 self.mark_watched(video_id, video_info, player_response)
d77ab8e2 2391
4ea3be0a 2392 return {
8bcc8756
JW
2393 'id': video_id,
2394 'uploader': video_uploader,
2395 'uploader_id': video_uploader_id,
fd050249 2396 'uploader_url': video_uploader_url,
dd4c4492
S
2397 'channel_id': channel_id,
2398 'channel_url': channel_url,
8bcc8756 2399 'upload_date': upload_date,
7caf9830 2400 'license': video_license,
936784b2 2401 'creator': video_creator or artist,
8bcc8756 2402 'title': video_title,
936784b2 2403 'alt_title': video_alt_title or track,
8bcc8756
JW
2404 'thumbnail': video_thumbnail,
2405 'description': video_description,
2406 'categories': video_categories,
000b6b5a 2407 'tags': video_tags,
8bcc8756 2408 'subtitles': video_subtitles,
360e1ca5 2409 'automatic_captions': automatic_captions,
8bcc8756
JW
2410 'duration': video_duration,
2411 'age_limit': 18 if age_gate else 0,
2412 'annotations': video_annotations,
9cafc3fd 2413 'chapters': chapters,
7e8c0af0 2414 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2415 'view_count': view_count,
4ea3be0a 2416 'like_count': like_count,
2417 'dislike_count': dislike_count,
bf3c9326 2418 'average_rating': average_rating,
8bcc8756 2419 'formats': formats,
2fe1ff85 2420 'is_live': is_live,
7c80519c 2421 'start_time': start_time,
297a564b 2422 'end_time': end_time,
12afdc2a
S
2423 'series': series,
2424 'season_number': season_number,
2425 'episode_number': episode_number,
936784b2
S
2426 'track': track,
2427 'artist': artist,
5caabd3c 2428 'album': album,
2429 'release_date': release_date,
2430 'release_year': release_year,
4ea3be0a 2431 }
c5e8d7af 2432
5f6a1245 2433
8e7aad20 2434class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2435 IE_DESC = 'YouTube.com playlists'
d67cc9fa 2436 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
2437 (?:https?://)?
2438 (?:\w+\.)?
c5e8d7af 2439 (?:
c0345b82
S
2440 (?:
2441 youtube\.com|
2442 invidio\.us
2443 )
2444 /
feaa5ad7 2445 (?:
87dadd45 2446 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
feaa5ad7
S
2447 \? (?:.*?[&;])*? (?:p|a|list)=
2448 | p/
2449 )|
2450 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
c5e8d7af 2451 )
d67cc9fa 2452 (
409b9324 2453 (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
5f6a1245 2454 # Top tracks, they can also include dots
d67cc9fa
JMF
2455 |(?:MC)[\w\.]*
2456 )
c5e8d7af
PH
2457 .*
2458 |
d0ba5587
S
2459 (%(playlist_id)s)
2460 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
8d81f3e3 2461 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
351f37c0
S
2462 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2463 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
78caa52a 2464 IE_NAME = 'youtube:playlist'
81127aa5
PH
2465 _TESTS = [{
2466 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2467 'info_dict': {
2468 'title': 'ytdl test PL',
a1cf99d0 2469 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
81127aa5
PH
2470 },
2471 'playlist_count': 3,
9291475f
PH
2472 }, {
2473 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2474 'info_dict': {
acf757f4 2475 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
9291475f
PH
2476 'title': 'YDL_Empty_List',
2477 },
2478 'playlist_count': 0,
4201ba13 2479 'skip': 'This playlist is private',
9291475f
PH
2480 }, {
2481 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2482 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2483 'info_dict': {
2484 'title': '29C3: Not my department',
acf757f4 2485 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
13a75688
S
2486 'uploader': 'Christiaan008',
2487 'uploader_id': 'ChRiStIaAn008',
9291475f
PH
2488 },
2489 'playlist_count': 95,
2490 }, {
2491 'note': 'issue #673',
2492 'url': 'PLBB231211A4F62143',
2493 'info_dict': {
f46a8702 2494 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 2495 'id': 'PLBB231211A4F62143',
13a75688
S
2496 'uploader': 'Wickydoo',
2497 'uploader_id': 'Wickydoo',
9291475f
PH
2498 },
2499 'playlist_mincount': 26,
2500 }, {
2501 'note': 'Large playlist',
2502 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2503 'info_dict': {
2504 'title': 'Uploads from Cauchemar',
acf757f4 2505 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
13a75688
S
2506 'uploader': 'Cauchemar',
2507 'uploader_id': 'Cauchemar89',
9291475f
PH
2508 },
2509 'playlist_mincount': 799,
2510 }, {
2511 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2512 'info_dict': {
2513 'title': 'YDL_safe_search',
acf757f4 2514 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
2515 },
2516 'playlist_count': 2,
4201ba13 2517 'skip': 'This playlist is private',
ac7553d0
PH
2518 }, {
2519 'note': 'embedded',
2d3d2997 2520 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0
PH
2521 'playlist_count': 4,
2522 'info_dict': {
2523 'title': 'JODA15',
acf757f4 2524 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
13a75688
S
2525 'uploader': 'milan',
2526 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
ac7553d0 2527 }
87dadd45
S
2528 }, {
2529 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2530 'playlist_mincount': 485,
2531 'info_dict': {
13a75688 2532 'title': '2018 Chinese New Singles (11/6 updated)',
87dadd45 2533 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
13a75688
S
2534 'uploader': 'LBK',
2535 'uploader_id': 'sdragonfang',
87dadd45 2536 }
6b08cdf6
PH
2537 }, {
2538 'note': 'Embedded SWF player',
2d3d2997 2539 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
6b08cdf6
PH
2540 'playlist_count': 4,
2541 'info_dict': {
2542 'title': 'JODA7',
acf757f4 2543 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
13a75688
S
2544 },
2545 'skip': 'This playlist does not exist',
4b7df0d3
JMF
2546 }, {
2547 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2548 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2549 'info_dict': {
acf757f4
PH
2550 'title': 'Uploads from Interstellar Movie',
2551 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688
S
2552 'uploader': 'Interstellar Movie',
2553 'uploader_id': 'InterstellarMovie1',
4b7df0d3 2554 },
481cc733 2555 'playlist_mincount': 21,
dacb3a86
S
2556 }, {
2557 # Playlist URL that does not actually serve a playlist
2558 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2559 'info_dict': {
2560 'id': 'FqZTN594JQw',
2561 'ext': 'webm',
2562 'title': "Smiley's People 01 detective, Adventure Series, Action",
2563 'uploader': 'STREEM',
2564 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2565 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2566 'upload_date': '20150526',
2567 'license': 'Standard YouTube License',
2568 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2569 'categories': ['People & Blogs'],
2570 'tags': list,
dbdaaa23 2571 'view_count': int,
dacb3a86
S
2572 'like_count': int,
2573 'dislike_count': int,
2574 },
2575 'params': {
2576 'skip_download': True,
2577 },
13a75688 2578 'skip': 'This video is not available.',
dacb3a86 2579 'add_ie': [YoutubeIE.ie_key()],
481cc733
S
2580 }, {
2581 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2582 'info_dict': {
2583 'id': 'yeWKywCrFtk',
2584 'ext': 'mp4',
2585 'title': 'Small Scale Baler and Braiding Rugs',
2586 'uploader': 'Backus-Page House Museum',
2587 'uploader_id': 'backuspagemuseum',
ec85ded8 2588 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
481cc733 2589 'upload_date': '20161008',
481cc733
S
2590 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2591 'categories': ['Nonprofits & Activism'],
2592 'tags': list,
2593 'like_count': int,
2594 'dislike_count': int,
2595 },
2596 'params': {
2597 'noplaylist': True,
2598 'skip_download': True,
2599 },
2e18adec
S
2600 }, {
2601 # https://github.com/ytdl-org/youtube-dl/issues/21844
2602 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2603 'info_dict': {
2604 'title': 'Data Analysis with Dr Mike Pound',
2605 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2606 'uploader_id': 'Computerphile',
2607 'uploader': 'Computerphile',
2608 },
2609 'playlist_mincount': 11,
feaa5ad7
S
2610 }, {
2611 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2612 'only_matching': True,
a6857510
S
2613 }, {
2614 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2615 'only_matching': True,
409b9324
S
2616 }, {
2617 # music album playlist
2618 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2619 'only_matching': True,
c0345b82
S
2620 }, {
2621 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2622 'only_matching': True,
81127aa5 2623 }]
c5e8d7af 2624
880e1c52
JMF
2625 def _real_initialize(self):
2626 self._login()
2627
351f37c0
S
2628 def extract_videos_from_page(self, page):
2629 ids_in_page = []
2630 titles_in_page = []
2631
2632 for item in re.findall(
2633 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2634 attrs = extract_attributes(item)
2635 video_id = attrs['data-video-id']
2636 video_title = unescapeHTML(attrs.get('data-title'))
2637 if video_title:
2638 video_title = video_title.strip()
2639 ids_in_page.append(video_id)
2640 titles_in_page.append(video_title)
2641
2642 # Fallback with old _VIDEO_RE
2643 self.extract_videos_from_page_impl(
2644 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2645
2646 # Relaxed fallbacks
2647 self.extract_videos_from_page_impl(
2648 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2649 ids_in_page, titles_in_page)
2650 self.extract_videos_from_page_impl(
2651 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2652 ids_in_page, titles_in_page)
2653
2654 return zip(ids_in_page, titles_in_page)
2655
652cdaa2 2656 def _extract_mix(self, playlist_id):
99209c29 2657 # The mixes are generated from a single video
652cdaa2 2658 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
2659 ids = []
2660 last_id = playlist_id[-11:]
2661 for n in itertools.count(1):
2662 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2663 webpage = self._download_webpage(
2664 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2665 new_ids = orderedSet(re.findall(
2666 r'''(?xs)data-video-username=".*?".*?
2667 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2668 webpage))
2669 # Fetch new pages until all the videos are repeated, it seems that
2670 # there are always 51 unique videos.
2671 new_ids = [_id for _id in new_ids if _id not in ids]
2672 if not new_ids:
2673 break
2674 ids.extend(new_ids)
2675 last_id = ids[-1]
2676
2677 url_results = self._ids_to_results(ids)
2678
bc2f773b 2679 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5 2680 title_span = (
3089bc74
S
2681 search_title('playlist-title')
2682 or search_title('title long-title')
2683 or search_title('title'))
76d1700b 2684 title = clean_html(title_span)
652cdaa2
JMF
2685
2686 return self.playlist_result(url_results, playlist_id, title)
2687
448830ce 2688 def _extract_playlist(self, playlist_id):
dbb94fb0
S
2689 url = self._TEMPLATE_URL % playlist_id
2690 page = self._download_webpage(url, playlist_id)
dbb94fb0 2691
067aa17e 2692 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
8bc0800d 2693 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
39b62db1
YCH
2694 match = match.strip()
2695 # Check if the playlist exists or is private
4201ba13
S
2696 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2697 if mobj:
2698 reason = mobj.group('reason')
2699 message = 'This playlist %s' % reason
2700 if 'private' in reason:
2701 message += ', use --username or --netrc to access it'
2702 message += '.'
2703 raise ExtractorError(message, expected=True)
39b62db1
YCH
2704 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2705 raise ExtractorError(
2706 'Invalid parameters. Maybe URL is incorrect.',
2707 expected=True)
2708 elif re.match(r'[^<]*Choose your language[^<]*', match):
2709 continue
2710 else:
2711 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 2712
dbb94fb0 2713 playlist_title = self._html_search_regex(
63b4295d 2714 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
dacb3a86 2715 page, 'title', default=None)
c5e8d7af 2716
07aeced6
S
2717 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2718 uploader = self._search_regex(
2719 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2720 page, 'uploader', default=None)
2721 mobj = re.search(
2722 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2723 page)
2724 if mobj:
2725 uploader_id = mobj.group('uploader_id')
2726 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2727 else:
2728 uploader_id = uploader_url = None
2729
dacb3a86
S
2730 has_videos = True
2731
2732 if not playlist_title:
2733 try:
2734 # Some playlist URLs don't actually serve a playlist (e.g.
2735 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2736 next(self._entries(page, playlist_id))
2737 except StopIteration:
2738 has_videos = False
2739
07aeced6 2740 playlist = self.playlist_result(
dacb3a86 2741 self._entries(page, playlist_id), playlist_id, playlist_title)
07aeced6
S
2742 playlist.update({
2743 'uploader': uploader,
2744 'uploader_id': uploader_id,
2745 'uploader_url': uploader_url,
2746 })
2747
2748 return has_videos, playlist
c5e8d7af 2749
ebf1b291 2750 def _check_download_just_video(self, url, playlist_id):
448830ce
S
2751 # Check if it's a video-specific URL
2752 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
481cc733 2753 video_id = query_dict.get('v', [None])[0] or self._search_regex(
87dadd45 2754 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
481cc733
S
2755 'video id', default=None)
2756 if video_id:
448830ce
S
2757 if self._downloader.params.get('noplaylist'):
2758 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
dacb3a86 2759 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce
S
2760 else:
2761 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
dacb3a86
S
2762 return video_id, None
2763 return None, None
448830ce 2764
ebf1b291
S
2765 def _real_extract(self, url):
2766 # Extract playlist id
2767 mobj = re.match(self._VALID_URL, url)
2768 if mobj is None:
2769 raise ExtractorError('Invalid URL: %s' % url)
2770 playlist_id = mobj.group(1) or mobj.group(2)
2771
dacb3a86 2772 video_id, video = self._check_download_just_video(url, playlist_id)
ebf1b291
S
2773 if video:
2774 return video
2775
466a6145 2776 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
2777 # Mixes require a custom extraction process
2778 return self._extract_mix(playlist_id)
2779
dacb3a86
S
2780 has_videos, playlist = self._extract_playlist(playlist_id)
2781 if has_videos or not video_id:
2782 return playlist
2783
2784 # Some playlist URLs don't actually serve a playlist (see
067aa17e 2785 # https://github.com/ytdl-org/youtube-dl/issues/10537).
dacb3a86
S
2786 # Fallback to plain video extraction if there is a video id
2787 # along with playlist id.
2788 return self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce 2789
c5e8d7af 2790
648e6a1f 2791class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2792 IE_DESC = 'YouTube.com channels'
cd5a74a2 2793 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 2794 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 2795 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 2796 IE_NAME = 'youtube:channel'
cdc628a4
PH
2797 _TESTS = [{
2798 'note': 'paginated channel',
2799 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2800 'playlist_mincount': 91,
acf757f4 2801 'info_dict': {
9170ca5b
JMF
2802 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2803 'title': 'Uploads from lex will',
13a75688
S
2804 'uploader': 'lex will',
2805 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
acf757f4 2806 }
5c43afd4
JMF
2807 }, {
2808 'note': 'Age restricted channel',
2809 # from https://www.youtube.com/user/DeusExOfficial
2810 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2811 'playlist_mincount': 64,
2812 'info_dict': {
2813 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2814 'title': 'Uploads from Deus Ex',
13a75688
S
2815 'uploader': 'Deus Ex',
2816 'uploader_id': 'DeusExOfficial',
5c43afd4 2817 },
cd5a74a2
S
2818 }, {
2819 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2820 'only_matching': True,
cdc628a4 2821 }]
c5e8d7af 2822
e462474e
S
2823 @classmethod
2824 def suitable(cls, url):
f07e276a
S
2825 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2826 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 2827
9558dcec
S
2828 def _build_template_url(self, url, channel_id):
2829 return self._TEMPLATE_URL % channel_id
2830
c5e8d7af 2831 def _real_extract(self, url):
9ff67727 2832 channel_id = self._match_id(url)
c5e8d7af 2833
9558dcec 2834 url = self._build_template_url(url, channel_id)
386bdfa6
S
2835
2836 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2837 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2838 # otherwise fallback on channel by page extraction
2839 channel_page = self._download_webpage(
2840 url + '?view=57', channel_id,
2841 'Downloading channel page', fatal=False)
2b3c2546
PH
2842 if channel_page is False:
2843 channel_playlist_id = False
2844 else:
2845 channel_playlist_id = self._html_search_meta(
2846 'channelId', channel_page, 'channel id', default=None)
2847 if not channel_playlist_id:
73c4ac2c
S
2848 channel_url = self._html_search_meta(
2849 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2850 channel_page, 'channel url', default=None)
2851 if channel_url:
2852 channel_playlist_id = self._search_regex(
2853 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2854 channel_url, 'channel id', default=None)
386bdfa6
S
2855 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2856 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
2857 return self.url_result(
2858 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 2859
60bf45c8 2860 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
2861 autogenerated = re.search(r'''(?x)
2862 class="[^"]*?(?:
2863 channel-header-autogenerated-label|
2864 yt-channel-title-autogenerated
2865 )[^"]*"''', channel_page) is not None
c5e8d7af 2866
b9643eed
JMF
2867 if autogenerated:
2868 # The videos are contained in a single page
2869 # the ajax pages can't be used, they are empty
b82f815f 2870 entries = [
fb69240c
S
2871 self.url_result(
2872 video_id, 'Youtube', video_id=video_id,
2873 video_title=video_title)
8f02ad4f 2874 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
2875 return self.playlist_result(entries, channel_id)
2876
73c4ac2c
S
2877 try:
2878 next(self._entries(channel_page, channel_id))
2879 except StopIteration:
2880 alert_message = self._html_search_regex(
2881 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2882 channel_page, 'alert', default=None, group='alert')
2883 if alert_message:
2884 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2885
648e6a1f 2886 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
2887
2888
eb0f3e7e 2889class YoutubeUserIE(YoutubeChannelIE):
78caa52a 2890 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
ea696249 2891 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
9558dcec 2892 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 2893 IE_NAME = 'youtube:user'
c5e8d7af 2894
cdc628a4
PH
2895 _TESTS = [{
2896 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2897 'playlist_mincount': 320,
2898 'info_dict': {
73c4ac2c
S
2899 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2900 'title': 'Uploads from The Linux Foundation',
13a75688
S
2901 'uploader': 'The Linux Foundation',
2902 'uploader_id': 'TheLinuxFoundation',
cdc628a4 2903 }
9558dcec
S
2904 }, {
2905 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2906 # but not https://www.youtube.com/user/12minuteathlete/videos
2907 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2908 'playlist_mincount': 249,
2909 'info_dict': {
2910 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2911 'title': 'Uploads from 12 Minute Athlete',
13a75688
S
2912 'uploader': '12 Minute Athlete',
2913 'uploader_id': 'the12minuteathlete',
9558dcec 2914 }
cdc628a4
PH
2915 }, {
2916 'url': 'ytuser:phihag',
2917 'only_matching': True,
daa0df9e
YCH
2918 }, {
2919 'url': 'https://www.youtube.com/c/gametrailers',
2920 'only_matching': True,
9558dcec
S
2921 }, {
2922 'url': 'https://www.youtube.com/gametrailers',
2923 'only_matching': True,
73c4ac2c 2924 }, {
0e879f43 2925 # This channel is not available, geo restricted to JP
73c4ac2c
S
2926 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2927 'only_matching': True,
cdc628a4
PH
2928 }]
2929
e3ea4790 2930 @classmethod
f4b05232 2931 def suitable(cls, url):
e3ea4790
JMF
2932 # Don't return True if the url can be extracted with other youtube
2933 # extractor, the regex would is too permissive and it would match.
f3a58d46 2934 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2935 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
2936 return False
2937 else:
2938 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 2939
9558dcec
S
2940 def _build_template_url(self, url, channel_id):
2941 mobj = re.match(self._VALID_URL, url)
2942 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2943
b05654f0 2944
f07e276a
S
2945class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2946 IE_DESC = 'YouTube.com live streams'
073d5bf5 2947 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
f07e276a
S
2948 IE_NAME = 'youtube:live'
2949
2950 _TESTS = [{
2d3d2997 2951 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
f07e276a
S
2952 'info_dict': {
2953 'id': 'a48o2S1cPoo',
2954 'ext': 'mp4',
2955 'title': 'The Young Turks - Live Main Show',
2956 'uploader': 'The Young Turks',
2957 'uploader_id': 'TheYoungTurks',
ec85ded8 2958 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
f07e276a
S
2959 'upload_date': '20150715',
2960 'license': 'Standard YouTube License',
2961 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2962 'categories': ['News & Politics'],
2963 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2964 'like_count': int,
2965 'dislike_count': int,
2966 },
2967 'params': {
2968 'skip_download': True,
2969 },
2970 }, {
2d3d2997 2971 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
f07e276a 2972 'only_matching': True,
c1b2a085
S
2973 }, {
2974 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2975 'only_matching': True,
073d5bf5
S
2976 }, {
2977 'url': 'https://www.youtube.com/TheYoungTurks/live',
2978 'only_matching': True,
f07e276a
S
2979 }]
2980
2981 def _real_extract(self, url):
2982 mobj = re.match(self._VALID_URL, url)
2983 channel_id = mobj.group('id')
2984 base_url = mobj.group('base_url')
2985 webpage = self._download_webpage(url, channel_id, fatal=False)
2986 if webpage:
2987 page_type = self._og_search_property(
e7f3529f 2988 'type', webpage, 'page type', default='')
f07e276a
S
2989 video_id = self._html_search_meta(
2990 'videoId', webpage, 'video id', default=None)
e7f3529f
S
2991 if page_type.startswith('video') and video_id and re.match(
2992 r'^[0-9A-Za-z_-]{11}$', video_id):
f07e276a
S
2993 return self.url_result(video_id, YoutubeIE.ie_key())
2994 return self.url_result(base_url)
2995
2996
e462474e
S
2997class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2998 IE_DESC = 'YouTube.com user/channel playlists'
2999 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
3000 IE_NAME = 'youtube:playlists'
0c148415 3001
e568c223 3002 _TESTS = [{
2d3d2997 3003 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
0c148415
S
3004 'playlist_mincount': 4,
3005 'info_dict': {
3006 'id': 'ThirstForScience',
13a75688 3007 'title': 'ThirstForScience',
0c148415 3008 },
e568c223
S
3009 }, {
3010 # with "Load more" button
2d3d2997 3011 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
e568c223
S
3012 'playlist_mincount': 70,
3013 'info_dict': {
3014 'id': 'igorkle1',
3015 'title': 'Игорь Клейнер',
3016 },
e462474e
S
3017 }, {
3018 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3019 'playlist_mincount': 17,
3020 'info_dict': {
3021 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3022 'title': 'Chem Player',
3023 },
13a75688 3024 'skip': 'Blocked',
e568c223 3025 }]
0c148415
S
3026
3027
870f3bfc
S
3028class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3029 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3030
3031
3032class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
78caa52a 3033 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
3034 # there doesn't appear to be a real limit, for example if you search for
3035 # 'python' you get more than 8.000.000 results
3036 _MAX_RESULTS = float('inf')
78caa52a 3037 IE_NAME = 'youtube:search'
b05654f0 3038 _SEARCH_KEY = 'ytsearch'
b4c08069 3039 _EXTRA_QUERY_ARGS = {}
9dd8e46a 3040 _TESTS = []
b05654f0 3041
b05654f0
PH
3042 def _get_n_results(self, query, n):
3043 """Get a specified number of results for a query"""
3044
b4c08069 3045 videos = []
b05654f0
PH
3046 limit = n
3047
a22b2fd1
YCH
3048 url_query = {
3049 'search_query': query.encode('utf-8'),
3050 }
3051 url_query.update(self._EXTRA_QUERY_ARGS)
3052 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3053
b4c08069 3054 for pagenum in itertools.count(1):
b4c08069 3055 data = self._download_json(
69ea8ca4 3056 result_url, video_id='query "%s"' % query,
b4c08069 3057 note='Downloading page %s' % pagenum,
a22b2fd1
YCH
3058 errnote='Unable to download API page',
3059 query={'spf': 'navigate'})
b4c08069 3060 html_content = data[1]['body']['content']
7cc3570e 3061
b4c08069 3062 if 'class="search-message' in html_content:
07ad22b8 3063 raise ExtractorError(
78caa52a 3064 '[youtube] No video results', expected=True)
b05654f0 3065
870f3bfc 3066 new_videos = list(self._process_page(html_content))
b4c08069
JMF
3067 videos += new_videos
3068 if not new_videos or len(videos) > limit:
3069 break
a22b2fd1
YCH
3070 next_link = self._html_search_regex(
3071 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3072 html_content, 'next link', default=None)
3073 if next_link is None:
3074 break
3075 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
b05654f0 3076
b4c08069
JMF
3077 if len(videos) > n:
3078 videos = videos[:n]
b05654f0 3079 return self.playlist_result(videos, query)
75dff0ee 3080
c9ae7b95 3081
a3dd9248 3082class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3083 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3084 _SEARCH_KEY = 'ytsearchdate'
78caa52a 3085 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 3086 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 3087
c9ae7b95 3088
870f3bfc 3089class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
78caa52a
PH
3090 IE_DESC = 'YouTube.com search URLs'
3091 IE_NAME = 'youtube:search_url'
d2c1f79f 3092 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
cdc628a4
PH
3093 _TESTS = [{
3094 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3095 'playlist_mincount': 5,
3096 'info_dict': {
3097 'title': 'youtube-dl test video',
3098 }
d2c1f79f
S
3099 }, {
3100 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3101 'only_matching': True,
cdc628a4 3102 }]
c9ae7b95
PH
3103
3104 def _real_extract(self, url):
3105 mobj = re.match(self._VALID_URL, url)
7fd002c0 3106 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 3107 webpage = self._download_webpage(url, query)
175c2e9e 3108 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
3109
3110
136dadde 3111class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 3112 IE_DESC = 'YouTube.com (multi-season) shows'
92519402 3113 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 3114 IE_NAME = 'youtube:show'
cdc628a4 3115 _TESTS = [{
4003bd82 3116 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 3117 'playlist_mincount': 5,
cdc628a4
PH
3118 'info_dict': {
3119 'id': 'airdisasters',
3120 'title': 'Air Disasters',
3121 }
3122 }]
75dff0ee
JMF
3123
3124 def _real_extract(self, url):
136dadde
S
3125 playlist_id = self._match_id(url)
3126 return super(YoutubeShowIE, self)._real_extract(
3127 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
3128
3129
b2e8bc1b 3130class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 3131 """
25f14e9f 3132 Base class for feed extractors
d7ae0639
JMF
3133 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3134 """
b2e8bc1b 3135 _LOGIN_REQUIRED = True
d7ae0639
JMF
3136
3137 @property
3138 def IE_NAME(self):
78caa52a 3139 return 'youtube:%s' % self._FEED_NAME
04cc9617 3140
81f0259b 3141 def _real_initialize(self):
b2e8bc1b 3142 self._login()
81f0259b 3143
3853309f 3144 def _entries(self, page):
2bc43303
JMF
3145 # The extraction process is the same as for playlists, but the regex
3146 # for the video ids doesn't contain an index
3147 ids = []
3148 more_widget_html = content_html = page
2bc43303
JMF
3149 for page_num in itertools.count(1):
3150 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
3151
3152 # 'recommended' feed has infinite 'load more' and each new portion spins
3153 # the same videos in (sometimes) slightly different order, so we'll check
3154 # for unicity and break when portion has no new videos
3853309f 3155 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
62c95fd5
S
3156 if not new_ids:
3157 break
3158
2bc43303
JMF
3159 ids.extend(new_ids)
3160
3853309f
S
3161 for entry in self._ids_to_results(new_ids):
3162 yield entry
3163
2bc43303
JMF
3164 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3165 if not mobj:
3166 break
3167
3168 more = self._download_json(
25f14e9f 3169 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303
JMF
3170 'Downloading page #%s' % page_num,
3171 transform_source=uppercase_escape)
3172 content_html = more['content_html']
3173 more_widget_html = more['load_more_widget_html']
3174
3853309f
S
3175 def _real_extract(self, url):
3176 page = self._download_webpage(
3177 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3178 self._PLAYLIST_TITLE)
25f14e9f 3179 return self.playlist_result(
3853309f 3180 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
25f14e9f
S
3181
3182
3183class YoutubeWatchLaterIE(YoutubePlaylistIE):
3184 IE_NAME = 'youtube:watchlater'
3185 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
92519402 3186 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 3187
bc7a9cd8
S
3188 _TESTS = [{
3189 'url': 'https://www.youtube.com/playlist?list=WL',
3190 'only_matching': True,
3191 }, {
3192 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3193 'only_matching': True,
3194 }]
25f14e9f
S
3195
3196 def _real_extract(self, url):
7e5dc339 3197 _, video = self._check_download_just_video(url, 'WL')
ebf1b291
S
3198 if video:
3199 return video
dacb3a86
S
3200 _, playlist = self._extract_playlist('WL')
3201 return playlist
f459d170 3202
5f6a1245 3203
c626a3d9 3204class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 3205 IE_NAME = 'youtube:favorites'
f3a34072 3206 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
92519402 3207 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
3208 _LOGIN_REQUIRED = True
3209
3210 def _real_extract(self, url):
3211 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 3212 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 3213 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
3214
3215
25f14e9f
S
3216class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3217 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
92519402 3218 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
25f14e9f
S
3219 _FEED_NAME = 'recommended'
3220 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 3221
1ed5b5c9 3222
25f14e9f
S
3223class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3224 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
92519402 3225 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
25f14e9f
S
3226 _FEED_NAME = 'subscriptions'
3227 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 3228
1ed5b5c9 3229
25f14e9f
S
3230class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3231 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
92519402 3232 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
25f14e9f
S
3233 _FEED_NAME = 'history'
3234 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
3235
3236
15870e90
PH
3237class YoutubeTruncatedURLIE(InfoExtractor):
3238 IE_NAME = 'youtube:truncated_url'
3239 IE_DESC = False # Do not list
975d35db 3240 _VALID_URL = r'''(?x)
b95aab84
PH
3241 (?:https?://)?
3242 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3243 (?:watch\?(?:
c4808c60 3244 feature=[a-z_]+|
b95aab84
PH
3245 annotation_id=annotation_[^&]+|
3246 x-yt-cl=[0-9]+|
c1708b89 3247 hl=[^&]*|
287be8c6 3248 t=[0-9]+
b95aab84
PH
3249 )?
3250 |
3251 attribution_link\?a=[^&]+
3252 )
3253 $
975d35db 3254 '''
15870e90 3255
c4808c60 3256 _TESTS = [{
2d3d2997 3257 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3258 'only_matching': True,
dc2fc736 3259 }, {
2d3d2997 3260 'url': 'https://www.youtube.com/watch?',
dc2fc736 3261 'only_matching': True,
b95aab84
PH
3262 }, {
3263 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3264 'only_matching': True,
3265 }, {
3266 'url': 'https://www.youtube.com/watch?feature=foo',
3267 'only_matching': True,
c1708b89
PH
3268 }, {
3269 'url': 'https://www.youtube.com/watch?hl=en-GB',
3270 'only_matching': True,
287be8c6
PH
3271 }, {
3272 'url': 'https://www.youtube.com/watch?t=2372',
3273 'only_matching': True,
c4808c60
PH
3274 }]
3275
15870e90
PH
3276 def _real_extract(self, url):
3277 raise ExtractorError(
78caa52a
PH
3278 'Did you forget to quote the URL? Remember that & is a meta '
3279 'character in most shells, so you want to put the URL in quotes, '
3280 'like youtube-dl '
2d3d2997 3281 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
78caa52a 3282 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3283 expected=True)
772fd5cc
PH
3284
3285
3286class YoutubeTruncatedIDIE(InfoExtractor):
3287 IE_NAME = 'youtube:truncated_id'
3288 IE_DESC = False # Do not list
b95aab84 3289 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3290
3291 _TESTS = [{
3292 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3293 'only_matching': True,
3294 }]
3295
3296 def _real_extract(self, url):
3297 video_id = self._match_id(url)
3298 raise ExtractorError(
3299 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3300 expected=True)