]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
[youtube] Improve unavailable message extraction (refs #22117)
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
f8c55c66 19 compat_HTTPError,
8d81f3e3 20 compat_kwargs,
c5e8d7af 21 compat_parse_qs,
7fd002c0
S
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
c5e8d7af 27 compat_str,
4bb4a188
PH
28)
29from ..utils import (
27019dbb 30 bool_or_none,
c5e8d7af 31 clean_html,
026fbedc 32 dict_get,
9b9c5355 33 error_to_compat_str,
351f37c0 34 extract_attributes,
c5e8d7af 35 ExtractorError,
2d30521a 36 float_or_none,
4bb4a188
PH
37 get_element_by_attribute,
38 get_element_by_id,
dd27fd17 39 int_or_none,
94278f72 40 mimetype2ext,
4bb4a188 41 orderedSet,
6310acf5 42 parse_codecs,
7c80519c 43 parse_duration,
54fc90aa 44 qualities,
0cb58b02 45 remove_quotes,
3995d37d 46 remove_start,
cf7e015f 47 smuggle_url,
dbdaaa23 48 str_or_none,
c93d53f5 49 str_to_int,
556dbe7f 50 try_get,
c5e8d7af
PH
51 unescapeHTML,
52 unified_strdate,
cf7e015f 53 unsmuggle_url,
81c2f20b 54 uppercase_escape,
21c340b8 55 url_or_none,
6e6bc8da 56 urlencode_postdata,
c5e8d7af
PH
57)
58
5f6a1245 59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
b2e8bc1b
JMF
69 _NETRC_MACHINE = 'youtube'
70 # If True it will raise an error if no login info is provided
71 _LOGIN_REQUIRED = False
72
409b9324 73 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
d0ba5587 74
b2e8bc1b 75 def _set_language(self):
810fb84d
PH
76 self._set_cookie(
77 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 78 # YouTube sets the expire time to about two months
810fb84d 79 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 80
25f14e9f
S
81 def _ids_to_results(self, ids):
82 return [
83 self.url_result(vid_id, 'Youtube', video_id=vid_id)
84 for vid_id in ids]
85
b2e8bc1b 86 def _login(self):
83317f69 87 """
88 Attempt to log in to YouTube.
89 True is returned if successful or skipped.
90 False is returned if login failed.
91
92 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
93 """
68217024 94 username, password = self._get_login_info()
b2e8bc1b
JMF
95 # No authentication to be performed
96 if username is None:
70d35d16 97 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 98 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 99 return True
b2e8bc1b 100
7cc3570e
PH
101 login_page = self._download_webpage(
102 self._LOGIN_URL, None,
69ea8ca4
PH
103 note='Downloading login page',
104 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
105 if login_page is False:
106 return
b2e8bc1b 107
1212e997 108 login_form = self._hidden_inputs(login_page)
c5e8d7af 109
e00eb564
S
110 def req(url, f_req, note, errnote):
111 data = login_form.copy()
112 data.update({
113 'pstMsg': 1,
114 'checkConnection': 'youtube',
115 'checkedDomains': 'youtube',
116 'hl': 'en',
117 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 118 'f.req': json.dumps(f_req),
e00eb564
S
119 'flowName': 'GlifWebSignIn',
120 'flowEntry': 'ServiceLogin',
baf67a60
S
121 # TODO: reverse actual botguard identifier generation algo
122 'bgRequest': '["identifier",""]',
041bc3ad 123 })
e00eb564
S
124 return self._download_json(
125 url, None, note=note, errnote=errnote,
126 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
127 fatal=False,
128 data=urlencode_postdata(data), headers={
129 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
130 'Google-Accounts-XSRF': 1,
131 })
132
3995d37d
S
133 def warn(message):
134 self._downloader.report_warning(message)
135
136 lookup_req = [
137 username,
138 None, [], None, 'US', None, None, 2, False, True,
139 [
140 None, None,
141 [2, 1, None, 1,
142 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
143 None, [], 4],
144 1, [None, None, []], None, None, None, True
145 ],
146 username,
147 ]
148
e00eb564 149 lookup_results = req(
3995d37d 150 self._LOOKUP_URL, lookup_req,
e00eb564
S
151 'Looking up account info', 'Unable to look up account info')
152
153 if lookup_results is False:
154 return False
041bc3ad 155
3995d37d
S
156 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
157 if not user_hash:
158 warn('Unable to extract user hash')
159 return False
160
161 challenge_req = [
162 user_hash,
163 None, 1, None, [1, None, None, None, [password, None, True]],
164 [
165 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
166 1, [None, None, []], None, None, None, True
167 ]]
83317f69 168
3995d37d
S
169 challenge_results = req(
170 self._CHALLENGE_URL, challenge_req,
171 'Logging in', 'Unable to log in')
83317f69 172
3995d37d 173 if challenge_results is False:
e00eb564 174 return
83317f69 175
3995d37d
S
176 login_res = try_get(challenge_results, lambda x: x[0][5], list)
177 if login_res:
178 login_msg = try_get(login_res, lambda x: x[5], compat_str)
179 warn(
180 'Unable to login: %s' % 'Invalid password'
181 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
182 return False
183
184 res = try_get(challenge_results, lambda x: x[0][-1], list)
185 if not res:
186 warn('Unable to extract result entry')
187 return False
188
9a6628aa
S
189 login_challenge = try_get(res, lambda x: x[0][0], list)
190 if login_challenge:
191 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
192 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
193 # SEND_SUCCESS - TFA code has been successfully sent to phone
194 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 195 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
196 if status == 'QUOTA_EXCEEDED':
197 warn('Exceeded the limit of TFA codes, try later')
198 return False
199
200 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
201 if not tl:
202 warn('Unable to extract TL')
203 return False
204
205 tfa_code = self._get_tfa_info('2-step verification code')
206
207 if not tfa_code:
208 warn(
209 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
210 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
211 return False
212
213 tfa_code = remove_start(tfa_code, 'G-')
214
215 tfa_req = [
216 user_hash, None, 2, None,
217 [
218 9, None, None, None, None, None, None, None,
219 [None, tfa_code, True, 2]
220 ]]
221
222 tfa_results = req(
223 self._TFA_URL.format(tl), tfa_req,
224 'Submitting TFA code', 'Unable to submit TFA code')
225
226 if tfa_results is False:
227 return False
228
229 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
230 if tfa_res:
231 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
232 warn(
233 'Unable to finish TFA: %s' % 'Invalid TFA code'
234 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
235 return False
236
237 check_cookie_url = try_get(
238 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
239 else:
240 CHALLENGES = {
241 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
242 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
243 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
244 }
245 challenge = CHALLENGES.get(
246 challenge_str,
247 '%s returned error %s.' % (self.IE_NAME, challenge_str))
248 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
249 return False
3995d37d
S
250 else:
251 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
252
253 if not check_cookie_url:
254 warn('Unable to extract CheckCookie URL')
255 return False
e00eb564
S
256
257 check_cookie_results = self._download_webpage(
3995d37d
S
258 check_cookie_url, None, 'Checking cookie', fatal=False)
259
260 if check_cookie_results is False:
261 return False
e00eb564 262
3995d37d
S
263 if 'https://myaccount.google.com/' not in check_cookie_results:
264 warn('Unable to log in')
b2e8bc1b 265 return False
e00eb564 266
b2e8bc1b
JMF
267 return True
268
30226342 269 def _download_webpage_handle(self, *args, **kwargs):
c1148516
S
270 query = kwargs.get('query', {}).copy()
271 query['disable_polymer'] = 'true'
272 kwargs['query'] = query
30226342 273 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
274 *args, **compat_kwargs(kwargs))
275
b2e8bc1b
JMF
276 def _real_initialize(self):
277 if self._downloader is None:
278 return
42939b61 279 self._set_language()
b2e8bc1b
JMF
280 if not self._login():
281 return
c5e8d7af 282
8377574c 283
8e7aad20 284class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 285 # Extract entries from page with "Load more" button
648e6a1f
S
286 def _entries(self, page, playlist_id):
287 more_widget_html = content_html = page
288 for page_num in itertools.count(1):
061a75ed
S
289 for entry in self._process_page(content_html):
290 yield entry
648e6a1f
S
291
292 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
293 if not mobj:
294 break
295
f8c55c66
S
296 count = 0
297 retries = 3
298 while count <= retries:
299 try:
300 # Downloading page may result in intermittent 5xx HTTP error
301 # that is usually worked around with a retry
302 more = self._download_json(
303 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
304 'Downloading page #%s%s'
305 % (page_num, ' (retry #%d)' % count if count else ''),
306 transform_source=uppercase_escape)
307 break
308 except ExtractorError as e:
309 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
310 count += 1
311 if count <= retries:
312 continue
313 raise
314
648e6a1f
S
315 content_html = more['content_html']
316 if not content_html.strip():
317 # Some webpages show a "Load more" button but they don't
318 # have more videos
319 break
320 more_widget_html = more['load_more_widget_html']
321
061a75ed
S
322
323class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
324 def _process_page(self, content):
325 for video_id, video_title in self.extract_videos_from_page(content):
326 yield self.url_result(video_id, 'Youtube', video_id, video_title)
327
351f37c0
S
328 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
329 for mobj in re.finditer(video_re, page):
648e6a1f
S
330 # The link with index 0 is not the first video of the playlist (not sure if still actual)
331 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
332 continue
333 video_id = mobj.group('id')
351f37c0
S
334 video_title = unescapeHTML(
335 mobj.group('title')) if 'title' in mobj.groupdict() else None
648e6a1f
S
336 if video_title:
337 video_title = video_title.strip()
351f37c0
S
338 if video_title == '► Play all':
339 video_title = None
648e6a1f
S
340 try:
341 idx = ids_in_page.index(video_id)
342 if video_title and not titles_in_page[idx]:
343 titles_in_page[idx] = video_title
344 except ValueError:
345 ids_in_page.append(video_id)
346 titles_in_page.append(video_title)
351f37c0
S
347
348 def extract_videos_from_page(self, page):
349 ids_in_page = []
350 titles_in_page = []
351 self.extract_videos_from_page_impl(
352 self._VIDEO_RE, page, ids_in_page, titles_in_page)
648e6a1f
S
353 return zip(ids_in_page, titles_in_page)
354
355
061a75ed
S
356class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
357 def _process_page(self, content):
6dee688e
S
358 for playlist_id in orderedSet(re.findall(
359 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
360 content)):
061a75ed
S
361 yield self.url_result(
362 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
363
0c148415
S
364 def _real_extract(self, url):
365 playlist_id = self._match_id(url)
366 webpage = self._download_webpage(url, playlist_id)
0c148415 367 title = self._og_search_title(webpage, fatal=False)
061a75ed 368 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
369
370
360e1ca5 371class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 372 IE_DESC = 'YouTube.com'
cb7dfeea 373 _VALID_URL = r"""(?x)^
c5e8d7af 374 (
edb53e2d 375 (?:https?://|//) # http(s):// or protocol-independent URL
cb7dfeea 376 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
484aaeb2 377 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 378 (?:www\.)?pwnyoutube\.com/|
8b561bfc 379 (?:www\.)?hooktube\.com/|
f7000f3a 380 (?:www\.)?yourepeat\.com/|
e69ae5b9 381 tube\.majestyc\.net/|
ba036333 382 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
77d95677 383 (?:(?:www|dev)\.)?invidio\.us/|
ba036333 384 (?:(?:www|no)\.)?invidiou\.sh/|
385 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
8ae113ca 386 (?:www\.)?invidious\.kabi\.tk/|
ba036333 387 (?:www\.)?invidious\.enkirton\.net/|
388 (?:www\.)?invidious\.13ad\.de/|
791d2e81 389 (?:www\.)?invidious\.mastodon\.host/|
ba036333 390 (?:www\.)?tube\.poal\.co/|
8ae113ca 391 (?:www\.)?vid\.wxzm\.sx/|
e69ae5b9 392 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
393 (?:.*?\#/)? # handle anchor (#/) redirect urls
394 (?: # the various things that can precede the ID:
ac7553d0 395 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 396 |(?: # or the v= param in all its forms
f7000f3a 397 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 398 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 399 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
400 v=
401 )
f4b05232 402 ))
cbaed4bb
S
403 |(?:
404 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
405 vid\.plus| # or vid.plus/xxxx
406 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 407 )/
edb53e2d 408 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 409 )
c5e8d7af 410 )? # all until now is optional -> you can pass the naked ID
8963d9c2 411 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
412 (?!.*?\blist=
413 (?:
414 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
415 WL # WL are handled by the watch later IE
416 )
417 )
c5e8d7af 418 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 419 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 420 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26 421 _formats = {
c2d3cb4c 422 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
423 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
424 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
425 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
426 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
427 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
428 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
429 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 430 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 431 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
432 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
433 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
434 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
435 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
436 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 437 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 438 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
439 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 440
441
442 # 3D videos
c2d3cb4c 443 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
444 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
445 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
446 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 447 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
448 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
449 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 450
96fb5605 451 # Apple HTTP Live Streaming
11f12195 452 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 453 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
454 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
455 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
456 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
457 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 458 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
459 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
460
461 # DASH mp4 video
d23028a8
S
462 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
463 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
464 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
465 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
466 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 467 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
468 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
469 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
470 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
471 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
472 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
473 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 474
f6f1fc92 475 # Dash mp4 audio
d23028a8
S
476 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
477 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
478 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
479 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
480 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
481 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
482 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
483
484 # Dash webm
d23028a8
S
485 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
486 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
487 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
488 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
489 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
490 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
491 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
492 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
493 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
494 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
495 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
496 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
497 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
498 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
499 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 500 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
501 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
502 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
503 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
504 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
505 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
507
508 # Dash webm audio
d23028a8
S
509 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
510 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 511
0857baad 512 # Dash webm audio with opus inside
d23028a8
S
513 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
514 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
515 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 516
ce6b9a2d
PH
517 # RTMP (unnamed)
518 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
519
520 # av01 video only formats sometimes served with "unknown" codecs
521 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
522 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
523 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
524 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 525 }
19041a38 526 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 527
fd5c4aab
S
528 _GEO_BYPASS = False
529
78caa52a 530 IE_NAME = 'youtube'
2eb88d95
PH
531 _TESTS = [
532 {
2d3d2997 533 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
534 'info_dict': {
535 'id': 'BaW_jenozKc',
536 'ext': 'mp4',
537 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
538 'uploader': 'Philipp Hagemeister',
539 'uploader_id': 'phihag',
ec85ded8 540 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
541 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
542 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e
PH
543 'upload_date': '20121002',
544 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
545 'categories': ['Science & Technology'],
000b6b5a 546 'tags': ['youtube-dl'],
556dbe7f 547 'duration': 10,
dbdaaa23 548 'view_count': int,
3e7c1224
PH
549 'like_count': int,
550 'dislike_count': int,
7c80519c 551 'start_time': 1,
297a564b 552 'end_time': 9,
2eb88d95 553 }
0e853ca4 554 },
0e853ca4 555 {
2d3d2997 556 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
4bc3a23e
PH
557 'note': 'Test generic use_cipher_signature video (#897)',
558 'info_dict': {
559 'id': 'UxxajLWwzqY',
560 'ext': 'mp4',
561 'upload_date': '20120506',
562 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 563 'alt_title': 'I Love It (feat. Charli XCX)',
7caf9830 564 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
000b6b5a
S
565 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
566 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
567 'iconic ep', 'iconic', 'love', 'it'],
556dbe7f 568 'duration': 180,
4bc3a23e
PH
569 'uploader': 'Icona Pop',
570 'uploader_id': 'IconaPop',
ec85ded8 571 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
0cb58b02 572 'creator': 'Icona Pop',
936784b2
S
573 'track': 'I Love It (feat. Charli XCX)',
574 'artist': 'Icona Pop',
2eb88d95 575 }
c108eb73
JMF
576 },
577 {
4bc3a23e
PH
578 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
579 'note': 'Test VEVO video with age protection (#956)',
580 'info_dict': {
581 'id': '07FYdnEawAQ',
582 'ext': 'mp4',
583 'upload_date': '20130703',
4fe54c12 584 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
0cb58b02 585 'alt_title': 'Tunnel Vision',
4fe54c12 586 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
556dbe7f 587 'duration': 419,
4bc3a23e
PH
588 'uploader': 'justintimberlakeVEVO',
589 'uploader_id': 'justintimberlakeVEVO',
ec85ded8 590 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
0cb58b02 591 'creator': 'Justin Timberlake',
7e72694b 592 'track': 'Tunnel Vision',
936784b2 593 'artist': 'Justin Timberlake',
34952f09 594 'age_limit': 18,
c108eb73
JMF
595 }
596 },
fccd3771 597 {
4bc3a23e
PH
598 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
599 'note': 'Embed-only video (#1746)',
600 'info_dict': {
601 'id': 'yZIXLfi8CZQ',
602 'ext': 'mp4',
603 'upload_date': '20120608',
604 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
605 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
606 'uploader': 'SET India',
94bfcd23 607 'uploader_id': 'setindia',
ec85ded8 608 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 609 'age_limit': 18,
fccd3771
PH
610 }
611 },
11b56058 612 {
2d3d2997 613 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
614 'note': 'Use the first video ID in the URL',
615 'info_dict': {
616 'id': 'BaW_jenozKc',
617 'ext': 'mp4',
618 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
619 'uploader': 'Philipp Hagemeister',
620 'uploader_id': 'phihag',
ec85ded8 621 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058
PM
622 'upload_date': '20121002',
623 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
624 'categories': ['Science & Technology'],
625 'tags': ['youtube-dl'],
556dbe7f 626 'duration': 10,
dbdaaa23 627 'view_count': int,
11b56058
PM
628 'like_count': int,
629 'dislike_count': int,
34a7de29
S
630 },
631 'params': {
632 'skip_download': True,
633 },
11b56058 634 },
dd27fd17 635 {
2d3d2997 636 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
637 'note': '256k DASH audio (format 141) via DASH manifest',
638 'info_dict': {
639 'id': 'a9LDPn-MO4I',
640 'ext': 'm4a',
641 'upload_date': '20121002',
642 'uploader_id': '8KVIDEO',
ec85ded8 643 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
644 'description': '',
645 'uploader': '8KVIDEO',
646 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 647 },
4bc3a23e
PH
648 'params': {
649 'youtube_include_dash_manifest': True,
650 'format': '141',
4919603f 651 },
de3c7fe0 652 'skip': 'format 141 not served anymore',
dd27fd17 653 },
3489b7d2
JMF
654 # DASH manifest with encrypted signature
655 {
78caa52a
PH
656 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
657 'info_dict': {
658 'id': 'IB3lcPjvWLA',
659 'ext': 'm4a',
4fe54c12
S
660 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
661 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
556dbe7f 662 'duration': 244,
78caa52a
PH
663 'uploader': 'AfrojackVEVO',
664 'uploader_id': 'AfrojackVEVO',
665 'upload_date': '20131011',
3489b7d2 666 },
4bc3a23e 667 'params': {
78caa52a 668 'youtube_include_dash_manifest': True,
de3c7fe0 669 'format': '141/bestaudio[ext=m4a]',
3489b7d2
JMF
670 },
671 },
aaeb86f6
S
672 # JS player signature function name containing $
673 {
674 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
675 'info_dict': {
676 'id': 'nfWlot6h_JM',
677 'ext': 'm4a',
678 'title': 'Taylor Swift - Shake It Off',
4fe54c12 679 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
556dbe7f 680 'duration': 242,
aaeb86f6
S
681 'uploader': 'TaylorSwiftVEVO',
682 'uploader_id': 'TaylorSwiftVEVO',
683 'upload_date': '20140818',
0cb58b02 684 'creator': 'Taylor Swift',
aaeb86f6
S
685 },
686 'params': {
687 'youtube_include_dash_manifest': True,
de3c7fe0 688 'format': '141/bestaudio[ext=m4a]',
aaeb86f6
S
689 },
690 },
aa79ac0c
PH
691 # Controversy video
692 {
693 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
694 'info_dict': {
695 'id': 'T4XJQO3qol8',
696 'ext': 'mp4',
556dbe7f 697 'duration': 219,
aa79ac0c 698 'upload_date': '20100909',
4fe54c12 699 'uploader': 'Amazing Atheist',
aa79ac0c 700 'uploader_id': 'TheAmazingAtheist',
ec85ded8 701 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c
PH
702 'title': 'Burning Everyone\'s Koran',
703 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
704 }
c522adb1
JMF
705 },
706 # Normal age-gate video (No vevo, embed allowed)
707 {
2d3d2997 708 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
709 'info_dict': {
710 'id': 'HtVdAasjOgU',
711 'ext': 'mp4',
712 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 713 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 714 'duration': 142,
c522adb1
JMF
715 'uploader': 'The Witcher',
716 'uploader_id': 'WitcherGame',
ec85ded8 717 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 718 'upload_date': '20140605',
34952f09 719 'age_limit': 18,
c522adb1
JMF
720 },
721 },
fccae2b9
S
722 # Age-gate video with encrypted signature
723 {
2d3d2997 724 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
fccae2b9
S
725 'info_dict': {
726 'id': '6kLq3WMV1nU',
4fe54c12 727 'ext': 'mp4',
fccae2b9
S
728 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
729 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
eb6793ba 730 'duration': 246,
fccae2b9
S
731 'uploader': 'LloydVEVO',
732 'uploader_id': 'LloydVEVO',
ec85ded8 733 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 734 'upload_date': '20110629',
34952f09 735 'age_limit': 18,
fccae2b9
S
736 },
737 },
067aa17e 738 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
7d02dcfa 739 # YouTube Red ad is not captured for creator
774e208f
PH
740 {
741 'url': '__2ABJjxzNo',
742 'info_dict': {
743 'id': '__2ABJjxzNo',
744 'ext': 'mp4',
556dbe7f 745 'duration': 266,
774e208f
PH
746 'upload_date': '20100430',
747 'uploader_id': 'deadmau5',
ec85ded8 748 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
0cb58b02 749 'creator': 'deadmau5',
774e208f
PH
750 'description': 'md5:12c56784b8032162bb936a5f76d55360',
751 'uploader': 'deadmau5',
752 'title': 'Deadmau5 - Some Chords (HD)',
0cb58b02 753 'alt_title': 'Some Chords',
774e208f
PH
754 },
755 'expected_warnings': [
756 'DASH manifest missing',
757 ]
e52a40ab 758 },
067aa17e 759 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
760 {
761 'url': 'lqQg6PlCWgI',
762 'info_dict': {
763 'id': 'lqQg6PlCWgI',
764 'ext': 'mp4',
556dbe7f 765 'duration': 6085,
90227264 766 'upload_date': '20150827',
cbe2bd91 767 'uploader_id': 'olympic',
ec85ded8 768 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 769 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 770 'uploader': 'Olympic',
cbe2bd91
PH
771 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
772 },
773 'params': {
774 'skip_download': 'requires avconv',
e52a40ab 775 }
cbe2bd91 776 },
6271f1ca
PH
777 # Non-square pixels
778 {
779 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
780 'info_dict': {
781 'id': '_b-2C3KPAM0',
782 'ext': 'mp4',
783 'stretched_ratio': 16 / 9.,
556dbe7f 784 'duration': 85,
6271f1ca
PH
785 'upload_date': '20110310',
786 'uploader_id': 'AllenMeow',
ec85ded8 787 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 788 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 789 'uploader': '孫ᄋᄅ',
6271f1ca
PH
790 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
791 },
06b491eb
S
792 },
793 # url_encoded_fmt_stream_map is empty string
794 {
795 'url': 'qEJwOuvDf7I',
796 'info_dict': {
797 'id': 'qEJwOuvDf7I',
f57b7835 798 'ext': 'webm',
06b491eb
S
799 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
800 'description': '',
801 'upload_date': '20150404',
802 'uploader_id': 'spbelect',
803 'uploader': 'Наблюдатели Петербурга',
804 },
805 'params': {
806 'skip_download': 'requires avconv',
e323cf3f
S
807 },
808 'skip': 'This live event has ended.',
06b491eb 809 },
067aa17e 810 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
811 {
812 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
813 'info_dict': {
814 'id': 'FIl7x6_3R5Y',
eb6793ba 815 'ext': 'webm',
da77d856
S
816 'title': 'md5:7b81415841e02ecd4313668cde88737a',
817 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 818 'duration': 220,
da77d856
S
819 'upload_date': '20150625',
820 'uploader_id': 'dorappi2000',
ec85ded8 821 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 822 'uploader': 'dorappi2000',
eb6793ba 823 'formats': 'mincount:31',
da77d856 824 },
eb6793ba 825 'skip': 'not actual anymore',
2ee8f5d8 826 },
8a1a26ce
YCH
827 # DASH manifest with segment_list
828 {
829 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
830 'md5': '8ce563a1d667b599d21064e982ab9e31',
831 'info_dict': {
832 'id': 'CsmdDsKjzN8',
833 'ext': 'mp4',
17ee98e1 834 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
835 'uploader': 'Airtek',
836 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
837 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
838 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
839 },
840 'params': {
841 'youtube_include_dash_manifest': True,
842 'format': '135', # bestvideo
be49068d
S
843 },
844 'skip': 'This live event has ended.',
2ee8f5d8 845 },
cf7e015f
S
846 {
847 # Multifeed videos (multiple cameras), URL is for Main Camera
848 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
849 'info_dict': {
850 'id': 'jqWvoWXjCVs',
851 'title': 'teamPGP: Rocket League Noob Stream',
852 'description': 'md5:dc7872fb300e143831327f1bae3af010',
853 },
854 'playlist': [{
855 'info_dict': {
856 'id': 'jqWvoWXjCVs',
857 'ext': 'mp4',
858 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
859 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 860 'duration': 7335,
cf7e015f
S
861 'upload_date': '20150721',
862 'uploader': 'Beer Games Beer',
863 'uploader_id': 'beergamesbeer',
ec85ded8 864 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 865 'license': 'Standard YouTube License',
cf7e015f
S
866 },
867 }, {
868 'info_dict': {
869 'id': '6h8e8xoXJzg',
870 'ext': 'mp4',
871 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
872 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 873 'duration': 7337,
cf7e015f
S
874 'upload_date': '20150721',
875 'uploader': 'Beer Games Beer',
876 'uploader_id': 'beergamesbeer',
ec85ded8 877 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 878 'license': 'Standard YouTube License',
cf7e015f
S
879 },
880 }, {
881 'info_dict': {
882 'id': 'PUOgX5z9xZw',
883 'ext': 'mp4',
884 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
885 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 886 'duration': 7337,
cf7e015f
S
887 'upload_date': '20150721',
888 'uploader': 'Beer Games Beer',
889 'uploader_id': 'beergamesbeer',
ec85ded8 890 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 891 'license': 'Standard YouTube License',
cf7e015f
S
892 },
893 }, {
894 'info_dict': {
895 'id': 'teuwxikvS5k',
896 'ext': 'mp4',
897 'title': 'teamPGP: Rocket League Noob Stream (zim)',
898 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 899 'duration': 7334,
cf7e015f
S
900 'upload_date': '20150721',
901 'uploader': 'Beer Games Beer',
902 'uploader_id': 'beergamesbeer',
ec85ded8 903 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 904 'license': 'Standard YouTube License',
cf7e015f
S
905 },
906 }],
907 'params': {
908 'skip_download': True,
909 },
4fe54c12 910 'skip': 'This video is not available.',
cbaed4bb 911 },
f9f49d87 912 {
067aa17e 913 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
914 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
915 'info_dict': {
916 'id': 'gVfLd0zydlo',
917 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
918 },
919 'playlist_count': 2,
be49068d 920 'skip': 'Not multifeed anymore',
f9f49d87 921 },
cbaed4bb 922 {
2d3d2997 923 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 924 'only_matching': True,
0e49d9a6 925 },
6d4fc66b 926 {
2d3d2997 927 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
928 'only_matching': True,
929 },
0e49d9a6 930 {
067aa17e 931 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 932 # Also tests cut-off URL expansion in video description (see
067aa17e
S
933 # https://github.com/ytdl-org/youtube-dl/issues/1892,
934 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
935 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
936 'info_dict': {
937 'id': 'lsguqyKfVQg',
938 'ext': 'mp4',
939 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 940 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 941 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 942 'duration': 133,
0e49d9a6
LL
943 'upload_date': '20151119',
944 'uploader_id': 'IronSoulElf',
ec85ded8 945 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 946 'uploader': 'IronSoulElf',
eb6793ba
S
947 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
948 'track': 'Dark Walk - Position Music',
949 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 950 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
951 },
952 'params': {
953 'skip_download': True,
954 },
955 },
61f92af1 956 {
067aa17e 957 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
958 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
959 'only_matching': True,
960 },
313dfc45
LL
961 {
962 # Video with yt:stretch=17:0
963 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
964 'info_dict': {
965 'id': 'Q39EVAstoRM',
966 'ext': 'mp4',
967 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
968 'description': 'md5:ee18a25c350637c8faff806845bddee9',
969 'upload_date': '20151107',
970 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
971 'uploader': 'CH GAMER DROID',
972 },
973 'params': {
974 'skip_download': True,
975 },
be49068d 976 'skip': 'This video does not exist.',
313dfc45 977 },
7caf9830
S
978 {
979 # Video licensed under Creative Commons
980 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
981 'info_dict': {
982 'id': 'M4gD1WSo5mA',
983 'ext': 'mp4',
984 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
985 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 986 'duration': 721,
7caf9830
S
987 'upload_date': '20150127',
988 'uploader_id': 'BerkmanCenter',
ec85ded8 989 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 990 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
991 'license': 'Creative Commons Attribution license (reuse allowed)',
992 },
993 'params': {
994 'skip_download': True,
995 },
996 },
fd050249
S
997 {
998 # Channel-like uploader_url
999 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1000 'info_dict': {
1001 'id': 'eQcmzGIKrzg',
1002 'ext': 'mp4',
1003 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1004 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 1005 'duration': 4060,
fd050249 1006 'upload_date': '20151119',
eb6793ba 1007 'uploader': 'Bernie Sanders',
fd050249 1008 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1009 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1010 'license': 'Creative Commons Attribution license (reuse allowed)',
1011 },
1012 'params': {
1013 'skip_download': True,
1014 },
1015 },
040ac686
S
1016 {
1017 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1018 'only_matching': True,
7f29cf54
S
1019 },
1020 {
067aa17e 1021 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1022 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1023 'only_matching': True,
6496ccb4
S
1024 },
1025 {
1026 # Rental video preview
1027 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1028 'info_dict': {
1029 'id': 'uGpuVWrhIzE',
1030 'ext': 'mp4',
1031 'title': 'Piku - Trailer',
1032 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1033 'upload_date': '20150811',
1034 'uploader': 'FlixMatrix',
1035 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1036 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1037 'license': 'Standard YouTube License',
1038 },
1039 'params': {
1040 'skip_download': True,
1041 },
eb6793ba 1042 'skip': 'This video is not available.',
022a5d66 1043 },
12afdc2a
S
1044 {
1045 # YouTube Red video with episode data
1046 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1047 'info_dict': {
1048 'id': 'iqKdEhx-dD4',
1049 'ext': 'mp4',
1050 'title': 'Isolation - Mind Field (Ep 1)',
4fe54c12 1051 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
556dbe7f 1052 'duration': 2085,
12afdc2a
S
1053 'upload_date': '20170118',
1054 'uploader': 'Vsauce',
1055 'uploader_id': 'Vsauce',
1056 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1057 'series': 'Mind Field',
1058 'season_number': 1,
1059 'episode_number': 1,
1060 },
1061 'params': {
1062 'skip_download': True,
1063 },
1064 'expected_warnings': [
1065 'Skipping DASH manifest',
1066 ],
1067 },
c7121fa7
S
1068 {
1069 # The following content has been identified by the YouTube community
1070 # as inappropriate or offensive to some audiences.
1071 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1072 'info_dict': {
1073 'id': '6SJNVb0GnPI',
1074 'ext': 'mp4',
1075 'title': 'Race Differences in Intelligence',
1076 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1077 'duration': 965,
1078 'upload_date': '20140124',
1079 'uploader': 'New Century Foundation',
1080 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1081 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1082 },
1083 'params': {
1084 'skip_download': True,
1085 },
1086 },
022a5d66
S
1087 {
1088 # itag 212
1089 'url': '1t24XAntNCY',
1090 'only_matching': True,
fd5c4aab
S
1091 },
1092 {
1093 # geo restricted to JP
1094 'url': 'sJL6WA-aGkQ',
1095 'only_matching': True,
1096 },
d0ba5587
S
1097 {
1098 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1099 'only_matching': True,
1100 },
cd5a74a2
S
1101 {
1102 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1103 'only_matching': True,
1104 },
825cd268
RA
1105 {
1106 # DRM protected
1107 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1108 'only_matching': True,
4fe54c12
S
1109 },
1110 {
1111 # Video with unsupported adaptive stream type formats
1112 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1113 'info_dict': {
1114 'id': 'Z4Vy8R84T1U',
1115 'ext': 'mp4',
1116 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1117 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1118 'duration': 433,
1119 'upload_date': '20130923',
1120 'uploader': 'Amelia Putri Harwita',
1121 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1122 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1123 'formats': 'maxcount:10',
1124 },
1125 'params': {
1126 'skip_download': True,
1127 'youtube_include_dash_manifest': False,
1128 },
5caabd3c 1129 },
1130 {
822b9d9c 1131 # Youtube Music Auto-generated description
5caabd3c 1132 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1133 'info_dict': {
1134 'id': 'MgNrAu2pzNs',
1135 'ext': 'mp4',
1136 'title': 'Voyeur Girl',
1137 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1138 'upload_date': '20190312',
1139 'uploader': 'Various Artists - Topic',
1140 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1141 'artist': 'Stephen',
1142 'track': 'Voyeur Girl',
1143 'album': 'it\'s too much love to know my dear',
1144 'release_date': '20190313',
1145 'release_year': 2019,
1146 },
1147 'params': {
1148 'skip_download': True,
1149 },
1150 },
1151 {
822b9d9c 1152 # Youtube Music Auto-generated description
5caabd3c 1153 # Retrieve 'artist' field from 'Artist:' in video description
1154 # when it is present on youtube music video
5caabd3c 1155 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1156 'info_dict': {
1157 'id': 'k0jLE7tTwjY',
1158 'ext': 'mp4',
1159 'title': 'Latch Feat. Sam Smith',
1160 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1161 'upload_date': '20150110',
1162 'uploader': 'Various Artists - Topic',
1163 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1164 'artist': 'Disclosure',
1165 'track': 'Latch Feat. Sam Smith',
1166 'album': 'Latch Featuring Sam Smith',
1167 'release_date': '20121008',
1168 'release_year': 2012,
1169 },
1170 'params': {
1171 'skip_download': True,
1172 },
1173 },
1174 {
822b9d9c 1175 # Youtube Music Auto-generated description
5caabd3c 1176 # handle multiple artists on youtube music video
1177 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1178 'info_dict': {
1179 'id': '74qn0eJSjpA',
1180 'ext': 'mp4',
1181 'title': 'Eastside',
1182 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1183 'upload_date': '20180710',
1184 'uploader': 'Benny Blanco - Topic',
1185 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1186 'artist': 'benny blanco, Halsey, Khalid',
1187 'track': 'Eastside',
1188 'album': 'Eastside',
1189 'release_date': '20180713',
1190 'release_year': 2018,
1191 },
1192 'params': {
1193 'skip_download': True,
1194 },
1195 },
1196 {
822b9d9c 1197 # Youtube Music Auto-generated description
5caabd3c 1198 # handle youtube music video with release_year and no release_date
1199 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1200 'info_dict': {
1201 'id': '-hcAI0g-f5M',
1202 'ext': 'mp4',
1203 'title': 'Put It On Me',
1204 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1205 'upload_date': '20180426',
1206 'uploader': 'Matt Maeson - Topic',
1207 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1208 'artist': 'Matt Maeson',
1209 'track': 'Put It On Me',
1210 'album': 'The Hearse',
1211 'release_date': None,
1212 'release_year': 2018,
1213 },
1214 'params': {
1215 'skip_download': True,
1216 },
1217 },
2eb88d95
PH
1218 ]
1219
e0df6211
PH
1220 def __init__(self, *args, **kwargs):
1221 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1222 self._player_cache = {}
e0df6211 1223
c5e8d7af
PH
1224 def report_video_info_webpage_download(self, video_id):
1225 """Report attempt to download video info webpage."""
69ea8ca4 1226 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1227
c5e8d7af
PH
1228 def report_information_extraction(self, video_id):
1229 """Report attempt to extract video information."""
69ea8ca4 1230 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1231
1232 def report_unavailable_format(self, video_id, format):
1233 """Report extracted video URL."""
69ea8ca4 1234 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1235
1236 def report_rtmp_download(self):
1237 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1238 self.to_screen('RTMP download detected')
c5e8d7af 1239
60064c53
PH
1240 def _signature_cache_id(self, example_sig):
1241 """ Return a string representation of a signature """
78caa52a 1242 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53
PH
1243
1244 def _extract_signature_function(self, video_id, player_url, example_sig):
cf010131 1245 id_m = re.match(
63529e93 1246 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
cf010131 1247 player_url)
c081b35c
PH
1248 if not id_m:
1249 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
1250 player_type = id_m.group('ext')
1251 player_id = id_m.group('id')
1252
c4417ddb 1253 # Read from filesystem cache
60064c53
PH
1254 func_id = '%s_%s_%s' % (
1255 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1256 assert os.path.basename(func_id) == func_id
a0e07d31 1257
69ea8ca4 1258 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1259 if cache_spec is not None:
78caa52a 1260 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1261
6d1a55a5
PH
1262 download_note = (
1263 'Downloading player %s' % player_url
1264 if self._downloader.params.get('verbose') else
1265 'Downloading %s player %s' % (player_type, player_id)
1266 )
e0df6211
PH
1267 if player_type == 'js':
1268 code = self._download_webpage(
1269 player_url, video_id,
6d1a55a5 1270 note=download_note,
69ea8ca4 1271 errnote='Download of %s failed' % player_url)
83799698 1272 res = self._parse_sig_js(code)
c4417ddb 1273 elif player_type == 'swf':
e0df6211
PH
1274 urlh = self._request_webpage(
1275 player_url, video_id,
6d1a55a5 1276 note=download_note,
69ea8ca4 1277 errnote='Download of %s failed' % player_url)
e0df6211 1278 code = urlh.read()
83799698 1279 res = self._parse_sig_swf(code)
e0df6211
PH
1280 else:
1281 assert False, 'Invalid player type %r' % player_type
1282
785521bf
PH
1283 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1284 cache_res = res(test_string)
1285 cache_spec = [ord(c) for c in cache_res]
83799698 1286
69ea8ca4 1287 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1288 return res
1289
60064c53 1290 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1291 def gen_sig_code(idxs):
1292 def _genslice(start, end, step):
78caa52a 1293 starts = '' if start == 0 else str(start)
8bcc8756 1294 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1295 steps = '' if step == 1 else (':%d' % step)
78caa52a 1296 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1297
1298 step = None
7af808a5
PH
1299 # Quelch pyflakes warnings - start will be set when step is set
1300 start = '(Never used)'
edf3e38e
PH
1301 for i, prev in zip(idxs[1:], idxs[:-1]):
1302 if step is not None:
1303 if i - prev == step:
1304 continue
1305 yield _genslice(start, prev, step)
1306 step = None
1307 continue
1308 if i - prev in [-1, 1]:
1309 step = i - prev
1310 start = prev
1311 continue
1312 else:
78caa52a 1313 yield 's[%d]' % prev
edf3e38e 1314 if step is None:
78caa52a 1315 yield 's[%d]' % i
edf3e38e
PH
1316 else:
1317 yield _genslice(start, i, step)
1318
78caa52a 1319 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1320 cache_res = func(test_string)
edf3e38e 1321 cache_spec = [ord(c) for c in cache_res]
78caa52a 1322 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1323 signature_id_tuple = '(%s)' % (
1324 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1325 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1326 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1327 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1328
e0df6211
PH
1329 def _parse_sig_js(self, jscode):
1330 funcname = self._search_regex(
abefc03f
S
1331 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1332 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
31ce6e99 1333 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1334 # Obsolete patterns
1335 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1336 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1337 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1338 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1339 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1340 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1341 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1342 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1343 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1344
1345 jsi = JSInterpreter(jscode)
1346 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1347 return lambda s: initial_function([s])
1348
1349 def _parse_sig_swf(self, file_contents):
54256267 1350 swfi = SWFInterpreter(file_contents)
78caa52a 1351 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1352 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1353 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1354 return lambda s: initial_function([s])
1355
83799698 1356 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1357 """Turn the encrypted s field into a working signature"""
6b37f0be 1358
c8bf86d5 1359 if player_url is None:
69ea8ca4 1360 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1361
69ea8ca4 1362 if player_url.startswith('//'):
78caa52a 1363 player_url = 'https:' + player_url
3c90cc8b
S
1364 elif not re.match(r'https?://', player_url):
1365 player_url = compat_urlparse.urljoin(
1366 'https://www.youtube.com', player_url)
c8bf86d5 1367 try:
62af3a0e 1368 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1369 if player_id not in self._player_cache:
1370 func = self._extract_signature_function(
60064c53 1371 video_id, player_url, s
c8bf86d5
PH
1372 )
1373 self._player_cache[player_id] = func
1374 func = self._player_cache[player_id]
1375 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1376 self._print_sig_code(func, s)
c8bf86d5
PH
1377 return func(s)
1378 except Exception as e:
1379 tb = traceback.format_exc()
1380 raise ExtractorError(
78caa52a 1381 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1382
360e1ca5 1383 def _get_subtitles(self, video_id, webpage):
de7f3446 1384 try:
60e47a26 1385 subs_doc = self._download_xml(
38c2e5b8 1386 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1387 video_id, note=False)
1388 except ExtractorError as err:
9b9c5355 1389 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1390 return {}
de7f3446
JMF
1391
1392 sub_lang_list = {}
60e47a26
JMF
1393 for track in subs_doc.findall('track'):
1394 lang = track.attrib['lang_code']
7e660ac1
LD
1395 if lang in sub_lang_list:
1396 continue
360e1ca5 1397 sub_formats = []
23d17e4b 1398 for ext in self._SUBTITLE_FORMATS:
15707c7e 1399 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1400 'lang': lang,
1401 'v': video_id,
1402 'fmt': ext,
1403 'name': track.attrib['name'].encode('utf-8'),
1404 })
1405 sub_formats.append({
1406 'url': 'https://www.youtube.com/api/timedtext?' + params,
1407 'ext': ext,
1408 })
1409 sub_lang_list[lang] = sub_formats
de7f3446 1410 if not sub_lang_list:
69ea8ca4 1411 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1412 return {}
1413 return sub_lang_list
1414
a72778d3
S
1415 def _get_ytplayer_config(self, video_id, webpage):
1416 patterns = (
526b3b07
S
1417 # User data may contain arbitrary character sequences that may affect
1418 # JSON extraction with regex, e.g. when '};' is contained the second
1419 # regex won't capture the whole JSON. Yet working around by trying more
1420 # concrete regex first keeping in mind proper quoted string handling
1421 # to be implemented in future that will replace this workaround (see
067aa17e
S
1422 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1423 # https://github.com/ytdl-org/youtube-dl/pull/7599)
a72778d3
S
1424 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1425 r';ytplayer\.config\s*=\s*({.+?});',
1426 )
1427 config = self._search_regex(
1428 patterns, webpage, 'ytplayer.config', default=None)
1429 if config:
1430 return self._parse_json(
1431 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1432
360e1ca5 1433 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1434 """We need the webpage for getting the captions url, pass it as an
1435 argument to speed up the process."""
69ea8ca4 1436 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1437 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1438 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1439 if not player_config:
de7f3446
JMF
1440 self._downloader.report_warning(err_msg)
1441 return {}
de7f3446 1442 try:
0792d563 1443 args = player_config['args']
b78b292f
S
1444 caption_url = args.get('ttsurl')
1445 if caption_url:
1446 timestamp = args['timestamp']
1447 # We get the available subtitles
15707c7e 1448 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1449 'type': 'list',
1450 'tlangs': 1,
1451 'asrs': 1,
1452 })
1453 list_url = caption_url + '&' + list_params
1454 caption_list = self._download_xml(list_url, video_id)
1455 original_lang_node = caption_list.find('track')
1456 if original_lang_node is None:
1457 self._downloader.report_warning('Video doesn\'t have automatic captions')
1458 return {}
1459 original_lang = original_lang_node.attrib['lang_code']
1460 caption_kind = original_lang_node.attrib.get('kind', '')
1461
1462 sub_lang_list = {}
1463 for lang_node in caption_list.findall('target'):
1464 sub_lang = lang_node.attrib['lang_code']
1465 sub_formats = []
1466 for ext in self._SUBTITLE_FORMATS:
15707c7e 1467 params = compat_urllib_parse_urlencode({
b78b292f
S
1468 'lang': original_lang,
1469 'tlang': sub_lang,
1470 'fmt': ext,
1471 'ts': timestamp,
1472 'kind': caption_kind,
1473 })
1474 sub_formats.append({
1475 'url': caption_url + '&' + params,
1476 'ext': ext,
1477 })
1478 sub_lang_list[sub_lang] = sub_formats
1479 return sub_lang_list
1480
ddbb4c5c
S
1481 def make_captions(sub_url, sub_langs):
1482 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1483 caption_qs = compat_parse_qs(parsed_sub_url.query)
1484 captions = {}
1485 for sub_lang in sub_langs:
1486 sub_formats = []
1487 for ext in self._SUBTITLE_FORMATS:
1488 caption_qs.update({
1489 'tlang': [sub_lang],
1490 'fmt': [ext],
1491 })
1492 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1493 query=compat_urllib_parse_urlencode(caption_qs, True)))
1494 sub_formats.append({
1495 'url': sub_url,
1496 'ext': ext,
1497 })
1498 captions[sub_lang] = sub_formats
1499 return captions
1500
1501 # New captions format as of 22.06.2017
1502 player_response = args.get('player_response')
1503 if player_response and isinstance(player_response, compat_str):
1504 player_response = self._parse_json(
1505 player_response, video_id, fatal=False)
1506 if player_response:
1507 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1508 base_url = renderer['captionTracks'][0]['baseUrl']
1509 sub_lang_list = []
1510 for lang in renderer['translationLanguages']:
1511 lang_code = lang.get('languageCode')
1512 if lang_code:
1513 sub_lang_list.append(lang_code)
1514 return make_captions(base_url, sub_lang_list)
1515
b78b292f
S
1516 # Some videos don't provide ttsurl but rather caption_tracks and
1517 # caption_translation_languages (e.g. 20LmZk1hakA)
ddbb4c5c 1518 # Does not used anymore as of 22.06.2017
b78b292f
S
1519 caption_tracks = args['caption_tracks']
1520 caption_translation_languages = args['caption_translation_languages']
1521 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
ddbb4c5c 1522 sub_lang_list = []
b78b292f
S
1523 for lang in caption_translation_languages.split(','):
1524 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1525 sub_lang = lang_qs.get('lc', [None])[0]
ddbb4c5c
S
1526 if sub_lang:
1527 sub_lang_list.append(sub_lang)
1528 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1529 # An extractor error can be raise by the download process if there are
1530 # no automatic captions but there are subtitles
ddbb4c5c 1531 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1532 self._downloader.report_warning(err_msg)
1533 return {}
1534
21c340b8
S
1535 def _mark_watched(self, video_id, video_info, player_response):
1536 playback_url = url_or_none(try_get(
1537 player_response,
1538 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1539 video_info, lambda x: x['videostats_playback_base_url'][0]))
d77ab8e2
S
1540 if not playback_url:
1541 return
1542 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1543 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1544
1545 # cpn generation algorithm is reverse engineered from base.js.
1546 # In fact it works even with dummy cpn.
1547 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1548 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1549
1550 qs.update({
1551 'ver': ['2'],
1552 'cpn': [cpn],
1553 })
1554 playback_url = compat_urlparse.urlunparse(
15707c7e 1555 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1556
1557 self._download_webpage(
1558 playback_url, video_id, 'Marking watched',
1559 'Unable to mark watched', fatal=False)
1560
66c9fa36
S
1561 @staticmethod
1562 def _extract_urls(webpage):
1563 # Embedded YouTube player
1564 entries = [
1565 unescapeHTML(mobj.group('url'))
1566 for mobj in re.finditer(r'''(?x)
1567 (?:
1568 <iframe[^>]+?src=|
1569 data-video-url=|
1570 <embed[^>]+?src=|
1571 embedSWF\(?:\s*|
1572 <object[^>]+data=|
1573 new\s+SWFObject\(
1574 )
1575 (["\'])
1576 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1577 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1578 \1''', webpage)]
1579
1580 # lazyYT YouTube embed
1581 entries.extend(list(map(
1582 unescapeHTML,
1583 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1584
1585 # Wordpress "YouTube Video Importer" plugin
1586 matches = re.findall(r'''(?x)<div[^>]+
1587 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1588 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1589 entries.extend(m[-1] for m in matches)
1590
1591 return entries
1592
1593 @staticmethod
1594 def _extract_url(webpage):
1595 urls = YoutubeIE._extract_urls(webpage)
1596 return urls[0] if urls else None
1597
97665381
PH
1598 @classmethod
1599 def extract_id(cls, url):
1600 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1601 if mobj is None:
69ea8ca4 1602 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1603 video_id = mobj.group(2)
1604 return video_id
1605
9cafc3fd
S
1606 @staticmethod
1607 def _extract_chapters(description, duration):
1608 if not description:
1609 return None
1610 chapter_lines = re.findall(
1611 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1612 description)
1613 if not chapter_lines:
1614 return None
1615 chapters = []
1616 for next_num, (chapter_line, time_point) in enumerate(
1617 chapter_lines, start=1):
1618 start_time = parse_duration(time_point)
1619 if start_time is None:
1620 continue
39d4c1be
S
1621 if start_time > duration:
1622 break
9cafc3fd
S
1623 end_time = (duration if next_num == len(chapter_lines)
1624 else parse_duration(chapter_lines[next_num][1]))
1625 if end_time is None:
1626 continue
39d4c1be
S
1627 if end_time > duration:
1628 end_time = duration
1629 if start_time > end_time:
1630 break
9cafc3fd
S
1631 chapter_title = re.sub(
1632 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1633 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1634 chapters.append({
1635 'start_time': start_time,
1636 'end_time': end_time,
1637 'title': chapter_title,
1638 })
1639 return chapters
1640
c5e8d7af 1641 def _real_extract(self, url):
cf7e015f
S
1642 url, smuggled_data = unsmuggle_url(url, {})
1643
7e8c0af0 1644 proto = (
78caa52a
PH
1645 'http' if self._downloader.params.get('prefer_insecure', False)
1646 else 'https')
7e8c0af0 1647
7c80519c 1648 start_time = None
297a564b 1649 end_time = None
7c80519c
JMF
1650 parsed_url = compat_urllib_parse_urlparse(url)
1651 for component in [parsed_url.fragment, parsed_url.query]:
1652 query = compat_parse_qs(component)
297a564b 1653 if start_time is None and 't' in query:
7c80519c 1654 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1655 if start_time is None and 'start' in query:
1656 start_time = parse_duration(query['start'][0])
297a564b
JMF
1657 if end_time is None and 'end' in query:
1658 end_time = parse_duration(query['end'][0])
7c80519c 1659
c5e8d7af
PH
1660 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1661 mobj = re.search(self._NEXT_URL_RE, url)
1662 if mobj:
7fd002c0 1663 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1664 video_id = self.extract_id(url)
c5e8d7af
PH
1665
1666 # Get video webpage
aa79ac0c 1667 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
a1f934b1 1668 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
1669
1670 # Attempt to extract SWF player URL
e0df6211 1671 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1672 if mobj is not None:
1673 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1674 else:
1675 player_url = None
1676
d8d24a92
S
1677 dash_mpds = []
1678
1679 def add_dash_mpd(video_info):
1680 dash_mpd = video_info.get('dashmpd')
1681 if dash_mpd and dash_mpd[0] not in dash_mpds:
1682 dash_mpds.append(dash_mpd[0])
1683
561b456e
S
1684 def add_dash_mpd_pr(pl_response):
1685 dash_mpd = url_or_none(try_get(
1686 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1687 compat_str))
1688 if dash_mpd and dash_mpd not in dash_mpds:
1689 dash_mpds.append(dash_mpd)
1690
c7121fa7
S
1691 is_live = None
1692 view_count = None
1693
1694 def extract_view_count(v_info):
1695 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1696
026fbedc
S
1697 def extract_token(v_info):
1698 return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
1699
c2d125d9
S
1700 def extract_player_response(player_response, video_id):
1701 pl_response = str_or_none(player_response)
1702 if not pl_response:
1703 return
1704 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1705 if isinstance(pl_response, dict):
1706 add_dash_mpd_pr(pl_response)
1707 return pl_response
1708
dbdaaa23
S
1709 player_response = {}
1710
c5e8d7af 1711 # Get video info
6449cd80 1712 embed_webpage = None
c108eb73 1713 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
1714 age_gate = True
1715 # We simulate the access to the video from www.youtube.com/v/{video_id}
1716 # this can be viewed without login into Youtube
beb95e77
CL
1717 url = proto + '://www.youtube.com/embed/%s' % video_id
1718 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
15707c7e 1719 data = compat_urllib_parse_urlencode({
2c57c7fa
JMF
1720 'video_id': video_id,
1721 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 1722 'sts': self._search_regex(
beb95e77 1723 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 1724 })
7e8c0af0 1725 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
94bd3613
PH
1726 video_info_webpage = self._download_webpage(
1727 video_info_url, video_id,
20436c30 1728 note='Refetching age-gated info webpage',
94bd3613 1729 errnote='unable to download video info webpage')
c5e8d7af 1730 video_info = compat_parse_qs(video_info_webpage)
c2d125d9
S
1731 pl_response = video_info.get('player_response', [None])[0]
1732 player_response = extract_player_response(pl_response, video_id)
d8d24a92 1733 add_dash_mpd(video_info)
c2d125d9 1734 view_count = extract_view_count(video_info)
c108eb73
JMF
1735 else:
1736 age_gate = False
bc93bdb5 1737 video_info = None
dc4e4f90 1738 sts = None
d8d24a92 1739 # Try looking directly into the video webpage
a72778d3
S
1740 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1741 if ytplayer_config:
4e62ebe2 1742 args = ytplayer_config['args']
4c76aa06 1743 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1744 # Convert to the same format returned by compat_parse_qs
1745 video_info = dict((k, [v]) for k, v in args.items())
1746 add_dash_mpd(video_info)
6496ccb4
S
1747 # Rental video is not rented but preview is available (e.g.
1748 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
067aa17e 1749 # https://github.com/ytdl-org/youtube-dl/issues/10532)
6496ccb4
S
1750 if not video_info and args.get('ypc_vid'):
1751 return self.url_result(
1752 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1753 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1754 is_live = True
dc4e4f90 1755 sts = ytplayer_config.get('sts')
dbdaaa23 1756 if not player_response:
c2d125d9 1757 player_response = extract_player_response(args.get('player_response'), video_id)
0a3cf9ad 1758 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
561b456e 1759 add_dash_mpd_pr(player_response)
0a3cf9ad
S
1760 # We also try looking in get_video_info since it may contain different dashmpd
1761 # URL that points to a DASH manifest with possibly different itag set (some itags
1762 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1763 # manifest pointed by get_video_info's dashmpd).
1764 # The general idea is to take a union of itags of both DASH manifests (for example
067aa17e 1765 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
4e62ebe2 1766 self.report_video_info_webpage_download(video_id)
a61ce714 1767 for el in ('embedded', 'detailpage', 'vevo', ''):
dc4e4f90
S
1768 query = {
1769 'video_id': video_id,
1770 'ps': 'default',
1771 'eurl': '',
1772 'gl': 'US',
1773 'hl': 'en',
1774 }
1775 if el:
1776 query['el'] = el
1777 if sts:
1778 query['sts'] = sts
810fb84d 1779 video_info_webpage = self._download_webpage(
dc4e4f90 1780 '%s://www.youtube.com/get_video_info' % proto,
4e62ebe2 1781 video_id, note=False,
dc4e4f90
S
1782 errnote='unable to download video info webpage',
1783 fatal=False, query=query)
1784 if not video_info_webpage:
1785 continue
0a3cf9ad 1786 get_video_info = compat_parse_qs(video_info_webpage)
dbdaaa23
S
1787 if not player_response:
1788 pl_response = get_video_info.get('player_response', [None])[0]
c2d125d9 1789 player_response = extract_player_response(pl_response, video_id)
fd545fc6 1790 add_dash_mpd(get_video_info)
c7121fa7
S
1791 if view_count is None:
1792 view_count = extract_view_count(get_video_info)
0a3cf9ad
S
1793 if not video_info:
1794 video_info = get_video_info
026fbedc 1795 get_token = extract_token(get_video_info)
56667d62 1796 if get_token:
89ea063e
S
1797 # Different get_video_info requests may report different results, e.g.
1798 # some may report video unavailability, but some may serve it without
067aa17e 1799 # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
89ea063e
S
1800 # the original webpage as well as el=info and el=embedded get_video_info
1801 # requests report video unavailability due to geo restriction while
1802 # el=detailpage succeeds and returns valid data). This is probably
1803 # due to YouTube measures against IP ranges of hosting providers.
1804 # Working around by preferring the first succeeded video_info containing
1805 # the token if no such video_info yet was found.
026fbedc 1806 token = extract_token(video_info)
56667d62 1807 if not token:
44b2264f 1808 video_info = get_video_info
4e62ebe2 1809 break
bbb7c3f7
YCH
1810
1811 def extract_unavailable_message():
0add33ab
S
1812 messages = []
1813 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1814 msg = self._html_search_regex(
1815 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1816 video_webpage, 'unavailable %s' % kind, default=None)
1817 if msg:
1818 messages.append(msg)
1819 if messages:
1820 return '\n'.join(messages)
bbb7c3f7 1821
15be3eb5
RA
1822 if not video_info:
1823 unavailable_message = extract_unavailable_message()
1824 if not unavailable_message:
1825 unavailable_message = 'Unable to extract video data'
1826 raise ExtractorError(
1827 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1828
dbdaaa23
S
1829 video_details = try_get(
1830 player_response, lambda x: x['videoDetails'], dict) or {}
1831
8dbf751a
RA
1832 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1833 if not video_title:
cf7e015f
S
1834 self._downloader.report_warning('Unable to extract video title')
1835 video_title = '_'
1836
9cafc3fd 1837 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1838 if video_description:
fa4bc6e7
RA
1839
1840 def replace_url(m):
1841 redir_url = compat_urlparse.urljoin(url, m.group(1))
1842 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1843 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1844 qs = compat_parse_qs(parsed_redir_url.query)
1845 q = qs.get('q')
1846 if q and q[0]:
1847 return q[0]
1848 return redir_url
1849
9cafc3fd 1850 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1851 <a\s+
25cb7a0e 1852 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1853 (?:title|href)="([^"]+)"\s+
25cb7a0e 1854 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1855 class="[^"]*"[^>]*>
23f13e97 1856 [^<]+\.{3}\s*
cf7e015f 1857 </a>
fa4bc6e7 1858 ''', replace_url, video_description)
cf7e015f
S
1859 video_description = clean_html(video_description)
1860 else:
8dbf751a 1861 video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
cf7e015f 1862
8fe10494 1863 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1864 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1865 multifeed_metadata_list = try_get(
1866 player_response,
1867 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1868 compat_str) or try_get(
1869 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1870 if multifeed_metadata_list:
1871 entries = []
1872 feed_ids = []
1873 for feed in multifeed_metadata_list.split(','):
1874 # Unquote should take place before split on comma (,) since textual
1875 # fields may contain comma as well (see
067aa17e 1876 # https://github.com/ytdl-org/youtube-dl/issues/8536)
8fe10494
S
1877 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1878 entries.append({
1879 '_type': 'url_transparent',
1880 'ie_key': 'Youtube',
1881 'url': smuggle_url(
1882 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1883 {'force_singlefeed': True}),
1884 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1885 })
1886 feed_ids.append(feed_data['id'][0])
1887 self.to_screen(
1888 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1889 % (', '.join(feed_ids), video_id))
1890 return self.playlist_result(entries, video_id, video_title, video_description)
1891 else:
1892 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1893
c7121fa7 1894 if view_count is None:
1c9c8de2 1895 view_count = extract_view_count(video_info)
dbdaaa23
S
1896 if view_count is None and video_details:
1897 view_count = int_or_none(video_details.get('viewCount'))
1d699755 1898
27019dbb 1899 if is_live is None:
898238e9 1900 is_live = bool_or_none(video_details.get('isLive'))
27019dbb 1901
c5e8d7af
PH
1902 # Check for "rental" videos
1903 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
067aa17e 1904 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 1905
c63ca0ee
S
1906 def _extract_filesize(media_url):
1907 return int_or_none(self._search_regex(
1908 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1909
c5e8d7af
PH
1910 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1911 self.report_rtmp_download()
dd27fd17
PH
1912 formats = [{
1913 'format_id': '_rtmp',
1914 'protocol': 'rtmp',
1915 'url': video_info['conn'][0],
1916 'player_url': player_url,
1917 }]
391dd6f0 1918 elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 1919 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1920 if 'rtmpe%3Dyes' in encoded_url_map:
067aa17e 1921 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
3318832e 1922 formats_spec = {}
82156fdb 1923 fmt_list = video_info.get('fmt_list', [''])[0]
1924 if fmt_list:
1925 for fmt in fmt_list.split(','):
1926 spec = fmt.split('/')
3318832e 1927 if len(spec) > 1:
1928 width_height = spec[1].split('x')
1929 if len(width_height) == 2:
1930 formats_spec[spec[0]] = {
1931 'resolution': spec[1],
1932 'width': int_or_none(width_height[0]),
1933 'height': int_or_none(width_height[1]),
1934 }
54fc90aa 1935 q = qualities(['small', 'medium', 'hd720'])
140a13f5
RA
1936 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
1937 if streaming_formats:
1938 for fmt in streaming_formats:
1939 itag = str_or_none(fmt.get('itag'))
1940 if not itag:
1941 continue
1942 quality = fmt.get('quality')
1943 quality_label = fmt.get('qualityLabel') or quality
1944 formats_spec[itag] = {
1945 'asr': int_or_none(fmt.get('audioSampleRate')),
1946 'filesize': int_or_none(fmt.get('contentLength')),
1947 'format_note': quality_label,
1948 'fps': int_or_none(fmt.get('fps')),
1949 'height': int_or_none(fmt.get('height')),
1950 'quality': q(quality),
1951 # bitrate for itag 43 is always 2147483647
1952 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1953 'width': int_or_none(fmt.get('width')),
1954 }
c9afb51c 1955 formats = []
00fe14fc 1956 for url_data_str in encoded_url_map.split(','):
c5e8d7af 1957 url_data = compat_parse_qs(url_data_str)
0d297518 1958 if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
201e9eaa 1959 continue
2f483bc1
S
1960 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1961 # Unsupported FORMAT_STREAM_TYPE_OTF
1962 if stream_type == 3:
1963 continue
201e9eaa
PH
1964 format_id = url_data['itag'][0]
1965 url = url_data['url'][0]
1966
a49eccdf 1967 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
6449cd80 1968 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
beb95e77 1969 jsplayer_url_json = self._search_regex(
6449cd80
PH
1970 ASSETS_RE,
1971 embed_webpage if age_gate else video_webpage,
1972 'JS player URL (1)', default=None)
1973 if not jsplayer_url_json and not age_gate:
1974 # We need the embed website after all
1975 if embed_webpage is None:
1976 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1977 embed_webpage = self._download_webpage(
1978 embed_url, video_id, 'Downloading embed webpage')
1979 jsplayer_url_json = self._search_regex(
1980 ASSETS_RE, embed_webpage, 'JS player URL')
1981
beb95e77 1982 player_url = json.loads(jsplayer_url_json)
201e9eaa
PH
1983 if player_url is None:
1984 player_url_json = self._search_regex(
1985 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
78caa52a 1986 video_webpage, 'age gate player URL')
201e9eaa
PH
1987 player_url = json.loads(player_url_json)
1988
a49eccdf
YCH
1989 if 'sig' in url_data:
1990 url += '&signature=' + url_data['sig'][0]
1991 elif 's' in url_data:
1992 encrypted_sig = url_data['s'][0]
1993
201e9eaa 1994 if self._downloader.params.get('verbose'):
cf010131 1995 if player_url is None:
201e9eaa
PH
1996 player_version = 'unknown'
1997 player_desc = 'unknown'
1998 else:
1999 if player_url.endswith('swf'):
2000 player_version = self._search_regex(
2001 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
78caa52a 2002 'flash player', fatal=False)
201e9eaa 2003 player_desc = 'flash player %s' % player_version
cf010131 2004 else:
201e9eaa 2005 player_version = self._search_regex(
b62985a9 2006 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
63529e93 2007 r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
201e9eaa
PH
2008 player_url,
2009 'html5 player', fatal=False)
78caa52a 2010 player_desc = 'html5 player %s' % player_version
201e9eaa 2011
60064c53 2012 parts_sizes = self._signature_cache_id(encrypted_sig)
69ea8ca4 2013 self.to_screen('{%s} signature length %s, %s' %
9e1a5b84 2014 (format_id, parts_sizes, player_desc))
201e9eaa
PH
2015
2016 signature = self._decrypt_signature(
2017 encrypted_sig, video_id, player_url, age_gate)
027ffdca
S
2018 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2019 url += '&%s=%s' % (sp, signature)
201e9eaa
PH
2020 if 'ratebypass' not in url:
2021 url += '&ratebypass=yes'
c9afb51c 2022
94278f72
YCH
2023 dct = {
2024 'format_id': format_id,
2025 'url': url,
2026 'player_url': player_url,
2027 }
2028 if format_id in self._formats:
2029 dct.update(self._formats[format_id])
3318832e 2030 if format_id in formats_spec:
2031 dct.update(formats_spec[format_id])
94278f72 2032
aabc2be6 2033 # Some itags are not included in DASH manifest thus corresponding formats will
067aa17e 2034 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
aabc2be6
S
2035 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2036 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2037 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 2038
c63ca0ee
S
2039 filesize = int_or_none(url_data.get(
2040 'clen', [None])[0]) or _extract_filesize(url)
2041
140a13f5 2042 quality = url_data.get('quality', [None])[0]
54fc90aa 2043
94278f72 2044 more_fields = {
c63ca0ee 2045 'filesize': filesize,
aabc2be6 2046 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
c9afb51c
AH
2047 'width': width,
2048 'height': height,
2049 'fps': int_or_none(url_data.get('fps', [None])[0]),
140a13f5 2050 'format_note': url_data.get('quality_label', [None])[0] or quality,
54fc90aa 2051 'quality': q(quality),
c9afb51c 2052 }
94278f72
YCH
2053 for key, value in more_fields.items():
2054 if value:
2055 dct[key] = value
aabc2be6
S
2056 type_ = url_data.get('type', [None])[0]
2057 if type_:
2058 type_split = type_.split(';')
2059 kind_ext = type_split[0].split('/')
2060 if len(kind_ext) == 2:
94278f72
YCH
2061 kind, _ = kind_ext
2062 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
2063 if kind in ('audio', 'video'):
2064 codecs = None
2065 for mobj in re.finditer(
2066 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2067 if mobj.group('key') == 'codecs':
2068 codecs = mobj.group('val')
2069 break
2070 if codecs:
6310acf5 2071 dct.update(parse_codecs(codecs))
e4a60912
S
2072 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2073 dct['downloader_options'] = {
2074 # Youtube throttles chunks >~10M
2075 'http_chunk_size': 10485760,
2076 }
aabc2be6 2077 formats.append(dct)
c5e8d7af 2078 else:
c3e54389
S
2079 manifest_url = (
2080 url_or_none(try_get(
2081 player_response,
2082 lambda x: x['streamingData']['hlsManifestUrl'],
3089bc74
S
2083 compat_str))
2084 or url_or_none(try_get(
c3e54389
S
2085 video_info, lambda x: x['hlsvp'][0], compat_str)))
2086 if manifest_url:
2087 formats = []
2088 m3u8_formats = self._extract_m3u8_formats(
2089 manifest_url, video_id, 'mp4', fatal=False)
2090 for a_format in m3u8_formats:
2091 itag = self._search_regex(
2092 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2093 if itag:
2094 a_format['format_id'] = itag
2095 if itag in self._formats:
2096 dct = self._formats[itag].copy()
2097 dct.update(a_format)
2098 a_format = dct
2099 a_format['player_url'] = player_url
2100 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2101 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2102 formats.append(a_format)
2103 else:
13577349 2104 error_message = extract_unavailable_message()
c3e54389 2105 if not error_message:
13577349
S
2106 error_message = clean_html(try_get(
2107 player_response, lambda x: x['playabilityStatus']['reason'],
2108 compat_str))
2109 if not error_message:
2110 error_message = clean_html(
2111 try_get(video_info, lambda x: x['reason'][0], compat_str))
c3e54389
S
2112 if error_message:
2113 raise ExtractorError(error_message, expected=True)
2114 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 2115
7e72694b 2116 # uploader
dbdaaa23
S
2117 video_uploader = try_get(
2118 video_info, lambda x: x['author'][0],
2119 compat_str) or str_or_none(video_details.get('author'))
7e72694b
S
2120 if video_uploader:
2121 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2122 else:
2123 self._downloader.report_warning('unable to extract uploader name')
2124
2125 # uploader_id
2126 video_uploader_id = None
2127 video_uploader_url = None
2128 mobj = re.search(
2129 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2130 video_webpage)
2131 if mobj is not None:
2132 video_uploader_id = mobj.group('uploader_id')
2133 video_uploader_url = mobj.group('uploader_url')
2134 else:
2135 self._downloader.report_warning('unable to extract uploader nickname')
2136
b45a9e69 2137 channel_id = (
3089bc74
S
2138 str_or_none(video_details.get('channelId'))
2139 or self._html_search_meta(
2140 'channelId', video_webpage, 'channel id', default=None)
2141 or self._search_regex(
b45a9e69 2142 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2143 video_webpage, 'channel id', default=None, group='id'))
dd4c4492
S
2144 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2145
7e72694b
S
2146 # thumbnail image
2147 # We try first to get a high quality image:
2148 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2149 video_webpage, re.DOTALL)
2150 if m_thumb is not None:
2151 video_thumbnail = m_thumb.group(1)
2152 elif 'thumbnail_url' not in video_info:
2153 self._downloader.report_warning('unable to extract video thumbnail')
2154 video_thumbnail = None
2155 else: # don't panic if we can't find it
2156 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2157
2158 # upload date
2159 upload_date = self._html_search_meta(
2160 'datePublished', video_webpage, 'upload date', default=None)
2161 if not upload_date:
2162 upload_date = self._search_regex(
2163 [r'(?s)id="eow-date.*?>(.*?)</span>',
2164 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2165 video_webpage, 'upload date', default=None)
2166 upload_date = unified_strdate(upload_date)
2167
2168 video_license = self._html_search_regex(
2169 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2170 video_webpage, 'license', default=None)
2171
2172 m_music = re.search(
2173 r'''(?x)
2174 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2175 <ul[^>]*>\s*
2176 <li>(?P<title>.+?)
2177 by (?P<creator>.+?)
2178 (?:
2179 \(.+?\)|
2180 <a[^>]*
2181 (?:
2182 \bhref=["\']/red[^>]*>| # drop possible
2183 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2184 )
2185 .*?
2186 )?</li
2187 ''',
2188 video_webpage)
2189 if m_music:
2190 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2191 video_creator = clean_html(m_music.group('creator'))
2192 else:
2193 video_alt_title = video_creator = None
2194
2195 def extract_meta(field):
2196 return self._html_search_regex(
2197 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2198 video_webpage, field, default=None)
2199
2200 track = extract_meta('Song')
2201 artist = extract_meta('Artist')
92bc97d3 2202 album = extract_meta('Album')
822b9d9c
RA
2203
2204 # Youtube Music Auto-generated description
92bc97d3 2205 release_date = release_year = None
822b9d9c
RA
2206 if video_description:
2207 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2208 if mobj:
2209 if not track:
2210 track = mobj.group('track').strip()
2211 if not artist:
2212 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
92bc97d3
RA
2213 if not album:
2214 album = mobj.group('album'.strip())
822b9d9c
RA
2215 release_year = mobj.group('release_year')
2216 release_date = mobj.group('release_date')
2217 if release_date:
2218 release_date = release_date.replace('-', '')
2219 if not release_year:
2220 release_year = int(release_date[:4])
2221 if release_year:
2222 release_year = int(release_year)
7e72694b
S
2223
2224 m_episode = re.search(
2225 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2226 video_webpage)
2227 if m_episode:
c2dd2dc0 2228 series = unescapeHTML(m_episode.group('series'))
7e72694b
S
2229 season_number = int(m_episode.group('season'))
2230 episode_number = int(m_episode.group('episode'))
2231 else:
2232 series = season_number = episode_number = None
2233
2234 m_cat_container = self._search_regex(
2235 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2236 video_webpage, 'categories', default=None)
2237 if m_cat_container:
2238 category = self._html_search_regex(
2239 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2240 default=None)
2241 video_categories = None if category is None else [category]
2242 else:
2243 video_categories = None
2244
2245 video_tags = [
2246 unescapeHTML(m.group('content'))
2247 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2248
2249 def _extract_count(count_name):
2250 return str_to_int(self._search_regex(
2251 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2252 % re.escape(count_name),
2253 video_webpage, count_name, default=None))
2254
2255 like_count = _extract_count('like')
2256 dislike_count = _extract_count('dislike')
2257
dbdaaa23
S
2258 if view_count is None:
2259 view_count = str_to_int(self._search_regex(
2260 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2261 'view count', default=None))
2262
bf3c9326
S
2263 average_rating = (
2264 float_or_none(video_details.get('averageRating'))
2265 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2266
7e72694b
S
2267 # subtitles
2268 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2269 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2270
2271 video_duration = try_get(
2272 video_info, lambda x: int_or_none(x['length_seconds'][0]))
dbdaaa23
S
2273 if not video_duration:
2274 video_duration = int_or_none(video_details.get('lengthSeconds'))
7e72694b
S
2275 if not video_duration:
2276 video_duration = parse_duration(self._html_search_meta(
2277 'duration', video_webpage, 'video duration'))
2278
2279 # annotations
2280 video_annotations = None
2281 if self._downloader.params.get('writeannotations', False):
64b6a4e9
RA
2282 xsrf_token = self._search_regex(
2283 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2284 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2285 invideo_url = try_get(
2286 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2287 if xsrf_token and invideo_url:
2288 xsrf_field_name = self._search_regex(
2289 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2290 video_webpage, 'xsrf field name',
2291 group='xsrf_field_name', default='session_token')
2292 video_annotations = self._download_webpage(
2293 self._proto_relative_url(invideo_url),
2294 video_id, note='Downloading annotations',
2295 errnote='Unable to download video annotations', fatal=False,
2296 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b
S
2297
2298 chapters = self._extract_chapters(description_original, video_duration)
2299
dd27fd17 2300 # Look for the DASH manifest
203fb43f 2301 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2302 dash_mpd_fatal = True
8ff648e4 2303 for mpd_url in dash_mpds:
d8d24a92 2304 dash_formats = {}
774e208f 2305 try:
05d0d131
YCH
2306 def decrypt_sig(mobj):
2307 s = mobj.group(1)
2308 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2309 return '/signature/%s' % dec_s
2310
8ff648e4 2311 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2312
8ff648e4 2313 for df in self._extract_mpd_formats(
2314 mpd_url, video_id, fatal=dash_mpd_fatal,
2315 formats_dict=self._formats):
c63ca0ee
S
2316 if not df.get('filesize'):
2317 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2318 # Do not overwrite DASH format found in some previous DASH manifest
2319 if df['format_id'] not in dash_formats:
2320 dash_formats[df['format_id']] = df
77c6fb5b
S
2321 # Additional DASH manifests may end up in HTTP Error 403 therefore
2322 # allow them to fail without bug report message if we already have
2323 # some DASH manifest succeeded. This is temporary workaround to reduce
2324 # burst of bug reports until we figure out the reason and whether it
2325 # can be fixed at all.
2326 dash_mpd_fatal = False
774e208f
PH
2327 except (ExtractorError, KeyError) as e:
2328 self.report_warning(
2329 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2330 if dash_formats:
04b3b3df
JMF
2331 # Remove the formats we found through non-DASH, they
2332 # contain less info and it can be wrong, because we use
2333 # fixed values (for example the resolution). See
067aa17e 2334 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
04b3b3df 2335 # example.
d80265cc 2336 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2337 formats.extend(dash_formats.values())
d80044c2 2338
6271f1ca
PH
2339 # Check for malformed aspect ratio
2340 stretched_m = re.search(
2341 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2342 video_webpage)
2343 if stretched_m:
313dfc45
LL
2344 w = float(stretched_m.group('w'))
2345 h = float(stretched_m.group('h'))
5faf9fed
S
2346 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2347 # We will only process correct ratios.
313dfc45 2348 if w > 0 and h > 0:
41f24c32 2349 ratio = w / h
313dfc45
LL
2350 for f in formats:
2351 if f.get('vcodec') != 'none':
2352 f['stretched_ratio'] = ratio
6271f1ca 2353
026fbedc
S
2354 if not formats:
2355 token = extract_token(video_info)
2356 if not token:
2357 if 'reason' in video_info:
2358 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2359 regions_allowed = self._html_search_meta(
2360 'regionsAllowed', video_webpage, default=None)
2361 countries = regions_allowed.split(',') if regions_allowed else None
2362 self.raise_geo_restricted(
2363 msg=video_info['reason'][0], countries=countries)
2364 reason = video_info['reason'][0]
2365 if 'Invalid parameters' in reason:
2366 unavailable_message = extract_unavailable_message()
2367 if unavailable_message:
2368 reason = unavailable_message
2369 raise ExtractorError(
2370 'YouTube said: %s' % reason,
2371 expected=True, video_id=video_id)
2372 else:
2373 raise ExtractorError(
2374 '"token" parameter not in video info for unknown reason',
2375 video_id=video_id)
2376
0d297518
RA
2377 if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
2378 raise ExtractorError('This video is DRM protected.', expected=True)
2379
4bcc7bd1 2380 self._sort_formats(formats)
4ea3be0a 2381
21c340b8 2382 self.mark_watched(video_id, video_info, player_response)
d77ab8e2 2383
4ea3be0a 2384 return {
8bcc8756
JW
2385 'id': video_id,
2386 'uploader': video_uploader,
2387 'uploader_id': video_uploader_id,
fd050249 2388 'uploader_url': video_uploader_url,
dd4c4492
S
2389 'channel_id': channel_id,
2390 'channel_url': channel_url,
8bcc8756 2391 'upload_date': upload_date,
7caf9830 2392 'license': video_license,
936784b2 2393 'creator': video_creator or artist,
8bcc8756 2394 'title': video_title,
936784b2 2395 'alt_title': video_alt_title or track,
8bcc8756
JW
2396 'thumbnail': video_thumbnail,
2397 'description': video_description,
2398 'categories': video_categories,
000b6b5a 2399 'tags': video_tags,
8bcc8756 2400 'subtitles': video_subtitles,
360e1ca5 2401 'automatic_captions': automatic_captions,
8bcc8756
JW
2402 'duration': video_duration,
2403 'age_limit': 18 if age_gate else 0,
2404 'annotations': video_annotations,
9cafc3fd 2405 'chapters': chapters,
7e8c0af0 2406 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2407 'view_count': view_count,
4ea3be0a 2408 'like_count': like_count,
2409 'dislike_count': dislike_count,
bf3c9326 2410 'average_rating': average_rating,
8bcc8756 2411 'formats': formats,
2fe1ff85 2412 'is_live': is_live,
7c80519c 2413 'start_time': start_time,
297a564b 2414 'end_time': end_time,
12afdc2a
S
2415 'series': series,
2416 'season_number': season_number,
2417 'episode_number': episode_number,
936784b2
S
2418 'track': track,
2419 'artist': artist,
5caabd3c 2420 'album': album,
2421 'release_date': release_date,
2422 'release_year': release_year,
4ea3be0a 2423 }
c5e8d7af 2424
5f6a1245 2425
8e7aad20 2426class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2427 IE_DESC = 'YouTube.com playlists'
d67cc9fa 2428 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
2429 (?:https?://)?
2430 (?:\w+\.)?
c5e8d7af 2431 (?:
c0345b82
S
2432 (?:
2433 youtube\.com|
2434 invidio\.us
2435 )
2436 /
feaa5ad7 2437 (?:
87dadd45 2438 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
feaa5ad7
S
2439 \? (?:.*?[&;])*? (?:p|a|list)=
2440 | p/
2441 )|
2442 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
c5e8d7af 2443 )
d67cc9fa 2444 (
409b9324 2445 (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
5f6a1245 2446 # Top tracks, they can also include dots
d67cc9fa
JMF
2447 |(?:MC)[\w\.]*
2448 )
c5e8d7af
PH
2449 .*
2450 |
d0ba5587
S
2451 (%(playlist_id)s)
2452 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
8d81f3e3 2453 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
351f37c0
S
2454 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2455 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
78caa52a 2456 IE_NAME = 'youtube:playlist'
81127aa5
PH
2457 _TESTS = [{
2458 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2459 'info_dict': {
2460 'title': 'ytdl test PL',
a1cf99d0 2461 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
81127aa5
PH
2462 },
2463 'playlist_count': 3,
9291475f
PH
2464 }, {
2465 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2466 'info_dict': {
acf757f4 2467 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
9291475f
PH
2468 'title': 'YDL_Empty_List',
2469 },
2470 'playlist_count': 0,
4201ba13 2471 'skip': 'This playlist is private',
9291475f
PH
2472 }, {
2473 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2474 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2475 'info_dict': {
2476 'title': '29C3: Not my department',
acf757f4 2477 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
13a75688
S
2478 'uploader': 'Christiaan008',
2479 'uploader_id': 'ChRiStIaAn008',
9291475f
PH
2480 },
2481 'playlist_count': 95,
2482 }, {
2483 'note': 'issue #673',
2484 'url': 'PLBB231211A4F62143',
2485 'info_dict': {
f46a8702 2486 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 2487 'id': 'PLBB231211A4F62143',
13a75688
S
2488 'uploader': 'Wickydoo',
2489 'uploader_id': 'Wickydoo',
9291475f
PH
2490 },
2491 'playlist_mincount': 26,
2492 }, {
2493 'note': 'Large playlist',
2494 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2495 'info_dict': {
2496 'title': 'Uploads from Cauchemar',
acf757f4 2497 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
13a75688
S
2498 'uploader': 'Cauchemar',
2499 'uploader_id': 'Cauchemar89',
9291475f
PH
2500 },
2501 'playlist_mincount': 799,
2502 }, {
2503 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2504 'info_dict': {
2505 'title': 'YDL_safe_search',
acf757f4 2506 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
2507 },
2508 'playlist_count': 2,
4201ba13 2509 'skip': 'This playlist is private',
ac7553d0
PH
2510 }, {
2511 'note': 'embedded',
2d3d2997 2512 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0
PH
2513 'playlist_count': 4,
2514 'info_dict': {
2515 'title': 'JODA15',
acf757f4 2516 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
13a75688
S
2517 'uploader': 'milan',
2518 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
ac7553d0 2519 }
87dadd45
S
2520 }, {
2521 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2522 'playlist_mincount': 485,
2523 'info_dict': {
13a75688 2524 'title': '2018 Chinese New Singles (11/6 updated)',
87dadd45 2525 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
13a75688
S
2526 'uploader': 'LBK',
2527 'uploader_id': 'sdragonfang',
87dadd45 2528 }
6b08cdf6
PH
2529 }, {
2530 'note': 'Embedded SWF player',
2d3d2997 2531 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
6b08cdf6
PH
2532 'playlist_count': 4,
2533 'info_dict': {
2534 'title': 'JODA7',
acf757f4 2535 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
13a75688
S
2536 },
2537 'skip': 'This playlist does not exist',
4b7df0d3
JMF
2538 }, {
2539 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2540 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2541 'info_dict': {
acf757f4
PH
2542 'title': 'Uploads from Interstellar Movie',
2543 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688
S
2544 'uploader': 'Interstellar Movie',
2545 'uploader_id': 'InterstellarMovie1',
4b7df0d3 2546 },
481cc733 2547 'playlist_mincount': 21,
dacb3a86
S
2548 }, {
2549 # Playlist URL that does not actually serve a playlist
2550 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2551 'info_dict': {
2552 'id': 'FqZTN594JQw',
2553 'ext': 'webm',
2554 'title': "Smiley's People 01 detective, Adventure Series, Action",
2555 'uploader': 'STREEM',
2556 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2557 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2558 'upload_date': '20150526',
2559 'license': 'Standard YouTube License',
2560 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2561 'categories': ['People & Blogs'],
2562 'tags': list,
dbdaaa23 2563 'view_count': int,
dacb3a86
S
2564 'like_count': int,
2565 'dislike_count': int,
2566 },
2567 'params': {
2568 'skip_download': True,
2569 },
13a75688 2570 'skip': 'This video is not available.',
dacb3a86 2571 'add_ie': [YoutubeIE.ie_key()],
481cc733
S
2572 }, {
2573 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2574 'info_dict': {
2575 'id': 'yeWKywCrFtk',
2576 'ext': 'mp4',
2577 'title': 'Small Scale Baler and Braiding Rugs',
2578 'uploader': 'Backus-Page House Museum',
2579 'uploader_id': 'backuspagemuseum',
ec85ded8 2580 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
481cc733 2581 'upload_date': '20161008',
481cc733
S
2582 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2583 'categories': ['Nonprofits & Activism'],
2584 'tags': list,
2585 'like_count': int,
2586 'dislike_count': int,
2587 },
2588 'params': {
2589 'noplaylist': True,
2590 'skip_download': True,
2591 },
2e18adec
S
2592 }, {
2593 # https://github.com/ytdl-org/youtube-dl/issues/21844
2594 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2595 'info_dict': {
2596 'title': 'Data Analysis with Dr Mike Pound',
2597 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2598 'uploader_id': 'Computerphile',
2599 'uploader': 'Computerphile',
2600 },
2601 'playlist_mincount': 11,
feaa5ad7
S
2602 }, {
2603 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2604 'only_matching': True,
a6857510
S
2605 }, {
2606 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2607 'only_matching': True,
409b9324
S
2608 }, {
2609 # music album playlist
2610 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2611 'only_matching': True,
c0345b82
S
2612 }, {
2613 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2614 'only_matching': True,
81127aa5 2615 }]
c5e8d7af 2616
880e1c52
JMF
2617 def _real_initialize(self):
2618 self._login()
2619
351f37c0
S
2620 def extract_videos_from_page(self, page):
2621 ids_in_page = []
2622 titles_in_page = []
2623
2624 for item in re.findall(
2625 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2626 attrs = extract_attributes(item)
2627 video_id = attrs['data-video-id']
2628 video_title = unescapeHTML(attrs.get('data-title'))
2629 if video_title:
2630 video_title = video_title.strip()
2631 ids_in_page.append(video_id)
2632 titles_in_page.append(video_title)
2633
2634 # Fallback with old _VIDEO_RE
2635 self.extract_videos_from_page_impl(
2636 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2637
2638 # Relaxed fallbacks
2639 self.extract_videos_from_page_impl(
2640 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2641 ids_in_page, titles_in_page)
2642 self.extract_videos_from_page_impl(
2643 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2644 ids_in_page, titles_in_page)
2645
2646 return zip(ids_in_page, titles_in_page)
2647
652cdaa2 2648 def _extract_mix(self, playlist_id):
99209c29 2649 # The mixes are generated from a single video
652cdaa2 2650 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
2651 ids = []
2652 last_id = playlist_id[-11:]
2653 for n in itertools.count(1):
2654 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2655 webpage = self._download_webpage(
2656 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2657 new_ids = orderedSet(re.findall(
2658 r'''(?xs)data-video-username=".*?".*?
2659 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2660 webpage))
2661 # Fetch new pages until all the videos are repeated, it seems that
2662 # there are always 51 unique videos.
2663 new_ids = [_id for _id in new_ids if _id not in ids]
2664 if not new_ids:
2665 break
2666 ids.extend(new_ids)
2667 last_id = ids[-1]
2668
2669 url_results = self._ids_to_results(ids)
2670
bc2f773b 2671 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5 2672 title_span = (
3089bc74
S
2673 search_title('playlist-title')
2674 or search_title('title long-title')
2675 or search_title('title'))
76d1700b 2676 title = clean_html(title_span)
652cdaa2
JMF
2677
2678 return self.playlist_result(url_results, playlist_id, title)
2679
448830ce 2680 def _extract_playlist(self, playlist_id):
dbb94fb0
S
2681 url = self._TEMPLATE_URL % playlist_id
2682 page = self._download_webpage(url, playlist_id)
dbb94fb0 2683
067aa17e 2684 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
8bc0800d 2685 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
39b62db1
YCH
2686 match = match.strip()
2687 # Check if the playlist exists or is private
4201ba13
S
2688 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2689 if mobj:
2690 reason = mobj.group('reason')
2691 message = 'This playlist %s' % reason
2692 if 'private' in reason:
2693 message += ', use --username or --netrc to access it'
2694 message += '.'
2695 raise ExtractorError(message, expected=True)
39b62db1
YCH
2696 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2697 raise ExtractorError(
2698 'Invalid parameters. Maybe URL is incorrect.',
2699 expected=True)
2700 elif re.match(r'[^<]*Choose your language[^<]*', match):
2701 continue
2702 else:
2703 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 2704
dbb94fb0 2705 playlist_title = self._html_search_regex(
63b4295d 2706 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
dacb3a86 2707 page, 'title', default=None)
c5e8d7af 2708
07aeced6
S
2709 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
2710 uploader = self._search_regex(
2711 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2712 page, 'uploader', default=None)
2713 mobj = re.search(
2714 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2715 page)
2716 if mobj:
2717 uploader_id = mobj.group('uploader_id')
2718 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2719 else:
2720 uploader_id = uploader_url = None
2721
dacb3a86
S
2722 has_videos = True
2723
2724 if not playlist_title:
2725 try:
2726 # Some playlist URLs don't actually serve a playlist (e.g.
2727 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2728 next(self._entries(page, playlist_id))
2729 except StopIteration:
2730 has_videos = False
2731
07aeced6 2732 playlist = self.playlist_result(
dacb3a86 2733 self._entries(page, playlist_id), playlist_id, playlist_title)
07aeced6
S
2734 playlist.update({
2735 'uploader': uploader,
2736 'uploader_id': uploader_id,
2737 'uploader_url': uploader_url,
2738 })
2739
2740 return has_videos, playlist
c5e8d7af 2741
ebf1b291 2742 def _check_download_just_video(self, url, playlist_id):
448830ce
S
2743 # Check if it's a video-specific URL
2744 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
481cc733 2745 video_id = query_dict.get('v', [None])[0] or self._search_regex(
87dadd45 2746 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
481cc733
S
2747 'video id', default=None)
2748 if video_id:
448830ce
S
2749 if self._downloader.params.get('noplaylist'):
2750 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
dacb3a86 2751 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce
S
2752 else:
2753 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
dacb3a86
S
2754 return video_id, None
2755 return None, None
448830ce 2756
ebf1b291
S
2757 def _real_extract(self, url):
2758 # Extract playlist id
2759 mobj = re.match(self._VALID_URL, url)
2760 if mobj is None:
2761 raise ExtractorError('Invalid URL: %s' % url)
2762 playlist_id = mobj.group(1) or mobj.group(2)
2763
dacb3a86 2764 video_id, video = self._check_download_just_video(url, playlist_id)
ebf1b291
S
2765 if video:
2766 return video
2767
466a6145 2768 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
2769 # Mixes require a custom extraction process
2770 return self._extract_mix(playlist_id)
2771
dacb3a86
S
2772 has_videos, playlist = self._extract_playlist(playlist_id)
2773 if has_videos or not video_id:
2774 return playlist
2775
2776 # Some playlist URLs don't actually serve a playlist (see
067aa17e 2777 # https://github.com/ytdl-org/youtube-dl/issues/10537).
dacb3a86
S
2778 # Fallback to plain video extraction if there is a video id
2779 # along with playlist id.
2780 return self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce 2781
c5e8d7af 2782
648e6a1f 2783class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2784 IE_DESC = 'YouTube.com channels'
cd5a74a2 2785 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 2786 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 2787 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 2788 IE_NAME = 'youtube:channel'
cdc628a4
PH
2789 _TESTS = [{
2790 'note': 'paginated channel',
2791 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2792 'playlist_mincount': 91,
acf757f4 2793 'info_dict': {
9170ca5b
JMF
2794 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2795 'title': 'Uploads from lex will',
13a75688
S
2796 'uploader': 'lex will',
2797 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
acf757f4 2798 }
5c43afd4
JMF
2799 }, {
2800 'note': 'Age restricted channel',
2801 # from https://www.youtube.com/user/DeusExOfficial
2802 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2803 'playlist_mincount': 64,
2804 'info_dict': {
2805 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2806 'title': 'Uploads from Deus Ex',
13a75688
S
2807 'uploader': 'Deus Ex',
2808 'uploader_id': 'DeusExOfficial',
5c43afd4 2809 },
cd5a74a2
S
2810 }, {
2811 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2812 'only_matching': True,
cdc628a4 2813 }]
c5e8d7af 2814
e462474e
S
2815 @classmethod
2816 def suitable(cls, url):
f07e276a
S
2817 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2818 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 2819
9558dcec
S
2820 def _build_template_url(self, url, channel_id):
2821 return self._TEMPLATE_URL % channel_id
2822
c5e8d7af 2823 def _real_extract(self, url):
9ff67727 2824 channel_id = self._match_id(url)
c5e8d7af 2825
9558dcec 2826 url = self._build_template_url(url, channel_id)
386bdfa6
S
2827
2828 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2829 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2830 # otherwise fallback on channel by page extraction
2831 channel_page = self._download_webpage(
2832 url + '?view=57', channel_id,
2833 'Downloading channel page', fatal=False)
2b3c2546
PH
2834 if channel_page is False:
2835 channel_playlist_id = False
2836 else:
2837 channel_playlist_id = self._html_search_meta(
2838 'channelId', channel_page, 'channel id', default=None)
2839 if not channel_playlist_id:
73c4ac2c
S
2840 channel_url = self._html_search_meta(
2841 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2842 channel_page, 'channel url', default=None)
2843 if channel_url:
2844 channel_playlist_id = self._search_regex(
2845 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2846 channel_url, 'channel id', default=None)
386bdfa6
S
2847 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2848 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
2849 return self.url_result(
2850 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 2851
60bf45c8 2852 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
2853 autogenerated = re.search(r'''(?x)
2854 class="[^"]*?(?:
2855 channel-header-autogenerated-label|
2856 yt-channel-title-autogenerated
2857 )[^"]*"''', channel_page) is not None
c5e8d7af 2858
b9643eed
JMF
2859 if autogenerated:
2860 # The videos are contained in a single page
2861 # the ajax pages can't be used, they are empty
b82f815f 2862 entries = [
fb69240c
S
2863 self.url_result(
2864 video_id, 'Youtube', video_id=video_id,
2865 video_title=video_title)
8f02ad4f 2866 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
2867 return self.playlist_result(entries, channel_id)
2868
73c4ac2c
S
2869 try:
2870 next(self._entries(channel_page, channel_id))
2871 except StopIteration:
2872 alert_message = self._html_search_regex(
2873 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2874 channel_page, 'alert', default=None, group='alert')
2875 if alert_message:
2876 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2877
648e6a1f 2878 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
2879
2880
eb0f3e7e 2881class YoutubeUserIE(YoutubeChannelIE):
78caa52a 2882 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
ea696249 2883 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
9558dcec 2884 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 2885 IE_NAME = 'youtube:user'
c5e8d7af 2886
cdc628a4
PH
2887 _TESTS = [{
2888 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2889 'playlist_mincount': 320,
2890 'info_dict': {
73c4ac2c
S
2891 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2892 'title': 'Uploads from The Linux Foundation',
13a75688
S
2893 'uploader': 'The Linux Foundation',
2894 'uploader_id': 'TheLinuxFoundation',
cdc628a4 2895 }
9558dcec
S
2896 }, {
2897 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2898 # but not https://www.youtube.com/user/12minuteathlete/videos
2899 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2900 'playlist_mincount': 249,
2901 'info_dict': {
2902 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2903 'title': 'Uploads from 12 Minute Athlete',
13a75688
S
2904 'uploader': '12 Minute Athlete',
2905 'uploader_id': 'the12minuteathlete',
9558dcec 2906 }
cdc628a4
PH
2907 }, {
2908 'url': 'ytuser:phihag',
2909 'only_matching': True,
daa0df9e
YCH
2910 }, {
2911 'url': 'https://www.youtube.com/c/gametrailers',
2912 'only_matching': True,
9558dcec
S
2913 }, {
2914 'url': 'https://www.youtube.com/gametrailers',
2915 'only_matching': True,
73c4ac2c 2916 }, {
0e879f43 2917 # This channel is not available, geo restricted to JP
73c4ac2c
S
2918 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2919 'only_matching': True,
cdc628a4
PH
2920 }]
2921
e3ea4790 2922 @classmethod
f4b05232 2923 def suitable(cls, url):
e3ea4790
JMF
2924 # Don't return True if the url can be extracted with other youtube
2925 # extractor, the regex would is too permissive and it would match.
f3a58d46 2926 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2927 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
2928 return False
2929 else:
2930 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 2931
9558dcec
S
2932 def _build_template_url(self, url, channel_id):
2933 mobj = re.match(self._VALID_URL, url)
2934 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2935
b05654f0 2936
f07e276a
S
2937class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2938 IE_DESC = 'YouTube.com live streams'
073d5bf5 2939 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
f07e276a
S
2940 IE_NAME = 'youtube:live'
2941
2942 _TESTS = [{
2d3d2997 2943 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
f07e276a
S
2944 'info_dict': {
2945 'id': 'a48o2S1cPoo',
2946 'ext': 'mp4',
2947 'title': 'The Young Turks - Live Main Show',
2948 'uploader': 'The Young Turks',
2949 'uploader_id': 'TheYoungTurks',
ec85ded8 2950 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
f07e276a
S
2951 'upload_date': '20150715',
2952 'license': 'Standard YouTube License',
2953 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2954 'categories': ['News & Politics'],
2955 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2956 'like_count': int,
2957 'dislike_count': int,
2958 },
2959 'params': {
2960 'skip_download': True,
2961 },
2962 }, {
2d3d2997 2963 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
f07e276a 2964 'only_matching': True,
c1b2a085
S
2965 }, {
2966 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2967 'only_matching': True,
073d5bf5
S
2968 }, {
2969 'url': 'https://www.youtube.com/TheYoungTurks/live',
2970 'only_matching': True,
f07e276a
S
2971 }]
2972
2973 def _real_extract(self, url):
2974 mobj = re.match(self._VALID_URL, url)
2975 channel_id = mobj.group('id')
2976 base_url = mobj.group('base_url')
2977 webpage = self._download_webpage(url, channel_id, fatal=False)
2978 if webpage:
2979 page_type = self._og_search_property(
e7f3529f 2980 'type', webpage, 'page type', default='')
f07e276a
S
2981 video_id = self._html_search_meta(
2982 'videoId', webpage, 'video id', default=None)
e7f3529f
S
2983 if page_type.startswith('video') and video_id and re.match(
2984 r'^[0-9A-Za-z_-]{11}$', video_id):
f07e276a
S
2985 return self.url_result(video_id, YoutubeIE.ie_key())
2986 return self.url_result(base_url)
2987
2988
e462474e
S
2989class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
2990 IE_DESC = 'YouTube.com user/channel playlists'
2991 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
2992 IE_NAME = 'youtube:playlists'
0c148415 2993
e568c223 2994 _TESTS = [{
2d3d2997 2995 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
0c148415
S
2996 'playlist_mincount': 4,
2997 'info_dict': {
2998 'id': 'ThirstForScience',
13a75688 2999 'title': 'ThirstForScience',
0c148415 3000 },
e568c223
S
3001 }, {
3002 # with "Load more" button
2d3d2997 3003 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
e568c223
S
3004 'playlist_mincount': 70,
3005 'info_dict': {
3006 'id': 'igorkle1',
3007 'title': 'Игорь Клейнер',
3008 },
e462474e
S
3009 }, {
3010 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3011 'playlist_mincount': 17,
3012 'info_dict': {
3013 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3014 'title': 'Chem Player',
3015 },
13a75688 3016 'skip': 'Blocked',
e568c223 3017 }]
0c148415
S
3018
3019
870f3bfc
S
3020class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3021 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3022
3023
3024class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
78caa52a 3025 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
3026 # there doesn't appear to be a real limit, for example if you search for
3027 # 'python' you get more than 8.000.000 results
3028 _MAX_RESULTS = float('inf')
78caa52a 3029 IE_NAME = 'youtube:search'
b05654f0 3030 _SEARCH_KEY = 'ytsearch'
b4c08069 3031 _EXTRA_QUERY_ARGS = {}
9dd8e46a 3032 _TESTS = []
b05654f0 3033
b05654f0
PH
3034 def _get_n_results(self, query, n):
3035 """Get a specified number of results for a query"""
3036
b4c08069 3037 videos = []
b05654f0
PH
3038 limit = n
3039
a22b2fd1
YCH
3040 url_query = {
3041 'search_query': query.encode('utf-8'),
3042 }
3043 url_query.update(self._EXTRA_QUERY_ARGS)
3044 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3045
b4c08069 3046 for pagenum in itertools.count(1):
b4c08069 3047 data = self._download_json(
69ea8ca4 3048 result_url, video_id='query "%s"' % query,
b4c08069 3049 note='Downloading page %s' % pagenum,
a22b2fd1
YCH
3050 errnote='Unable to download API page',
3051 query={'spf': 'navigate'})
b4c08069 3052 html_content = data[1]['body']['content']
7cc3570e 3053
b4c08069 3054 if 'class="search-message' in html_content:
07ad22b8 3055 raise ExtractorError(
78caa52a 3056 '[youtube] No video results', expected=True)
b05654f0 3057
870f3bfc 3058 new_videos = list(self._process_page(html_content))
b4c08069
JMF
3059 videos += new_videos
3060 if not new_videos or len(videos) > limit:
3061 break
a22b2fd1
YCH
3062 next_link = self._html_search_regex(
3063 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3064 html_content, 'next link', default=None)
3065 if next_link is None:
3066 break
3067 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
b05654f0 3068
b4c08069
JMF
3069 if len(videos) > n:
3070 videos = videos[:n]
b05654f0 3071 return self.playlist_result(videos, query)
75dff0ee 3072
c9ae7b95 3073
a3dd9248 3074class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3075 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3076 _SEARCH_KEY = 'ytsearchdate'
78caa52a 3077 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 3078 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 3079
c9ae7b95 3080
870f3bfc 3081class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
78caa52a
PH
3082 IE_DESC = 'YouTube.com search URLs'
3083 IE_NAME = 'youtube:search_url'
d2c1f79f 3084 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
cdc628a4
PH
3085 _TESTS = [{
3086 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3087 'playlist_mincount': 5,
3088 'info_dict': {
3089 'title': 'youtube-dl test video',
3090 }
d2c1f79f
S
3091 }, {
3092 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3093 'only_matching': True,
cdc628a4 3094 }]
c9ae7b95
PH
3095
3096 def _real_extract(self, url):
3097 mobj = re.match(self._VALID_URL, url)
7fd002c0 3098 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 3099 webpage = self._download_webpage(url, query)
175c2e9e 3100 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
3101
3102
136dadde 3103class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 3104 IE_DESC = 'YouTube.com (multi-season) shows'
92519402 3105 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 3106 IE_NAME = 'youtube:show'
cdc628a4 3107 _TESTS = [{
4003bd82 3108 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 3109 'playlist_mincount': 5,
cdc628a4
PH
3110 'info_dict': {
3111 'id': 'airdisasters',
3112 'title': 'Air Disasters',
3113 }
3114 }]
75dff0ee
JMF
3115
3116 def _real_extract(self, url):
136dadde
S
3117 playlist_id = self._match_id(url)
3118 return super(YoutubeShowIE, self)._real_extract(
3119 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
3120
3121
b2e8bc1b 3122class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 3123 """
25f14e9f 3124 Base class for feed extractors
d7ae0639
JMF
3125 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3126 """
b2e8bc1b 3127 _LOGIN_REQUIRED = True
d7ae0639
JMF
3128
3129 @property
3130 def IE_NAME(self):
78caa52a 3131 return 'youtube:%s' % self._FEED_NAME
04cc9617 3132
81f0259b 3133 def _real_initialize(self):
b2e8bc1b 3134 self._login()
81f0259b 3135
3853309f 3136 def _entries(self, page):
2bc43303
JMF
3137 # The extraction process is the same as for playlists, but the regex
3138 # for the video ids doesn't contain an index
3139 ids = []
3140 more_widget_html = content_html = page
2bc43303
JMF
3141 for page_num in itertools.count(1):
3142 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
3143
3144 # 'recommended' feed has infinite 'load more' and each new portion spins
3145 # the same videos in (sometimes) slightly different order, so we'll check
3146 # for unicity and break when portion has no new videos
3853309f 3147 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
62c95fd5
S
3148 if not new_ids:
3149 break
3150
2bc43303
JMF
3151 ids.extend(new_ids)
3152
3853309f
S
3153 for entry in self._ids_to_results(new_ids):
3154 yield entry
3155
2bc43303
JMF
3156 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3157 if not mobj:
3158 break
3159
3160 more = self._download_json(
25f14e9f 3161 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303
JMF
3162 'Downloading page #%s' % page_num,
3163 transform_source=uppercase_escape)
3164 content_html = more['content_html']
3165 more_widget_html = more['load_more_widget_html']
3166
3853309f
S
3167 def _real_extract(self, url):
3168 page = self._download_webpage(
3169 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3170 self._PLAYLIST_TITLE)
25f14e9f 3171 return self.playlist_result(
3853309f 3172 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
25f14e9f
S
3173
3174
3175class YoutubeWatchLaterIE(YoutubePlaylistIE):
3176 IE_NAME = 'youtube:watchlater'
3177 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
92519402 3178 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 3179
bc7a9cd8
S
3180 _TESTS = [{
3181 'url': 'https://www.youtube.com/playlist?list=WL',
3182 'only_matching': True,
3183 }, {
3184 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3185 'only_matching': True,
3186 }]
25f14e9f
S
3187
3188 def _real_extract(self, url):
7e5dc339 3189 _, video = self._check_download_just_video(url, 'WL')
ebf1b291
S
3190 if video:
3191 return video
dacb3a86
S
3192 _, playlist = self._extract_playlist('WL')
3193 return playlist
f459d170 3194
5f6a1245 3195
c626a3d9 3196class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 3197 IE_NAME = 'youtube:favorites'
f3a34072 3198 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
92519402 3199 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
3200 _LOGIN_REQUIRED = True
3201
3202 def _real_extract(self, url):
3203 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 3204 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 3205 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
3206
3207
25f14e9f
S
3208class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3209 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
92519402 3210 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
25f14e9f
S
3211 _FEED_NAME = 'recommended'
3212 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 3213
1ed5b5c9 3214
25f14e9f
S
3215class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3216 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
92519402 3217 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
25f14e9f
S
3218 _FEED_NAME = 'subscriptions'
3219 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 3220
1ed5b5c9 3221
25f14e9f
S
3222class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3223 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
92519402 3224 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
25f14e9f
S
3225 _FEED_NAME = 'history'
3226 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
3227
3228
15870e90
PH
3229class YoutubeTruncatedURLIE(InfoExtractor):
3230 IE_NAME = 'youtube:truncated_url'
3231 IE_DESC = False # Do not list
975d35db 3232 _VALID_URL = r'''(?x)
b95aab84
PH
3233 (?:https?://)?
3234 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3235 (?:watch\?(?:
c4808c60 3236 feature=[a-z_]+|
b95aab84
PH
3237 annotation_id=annotation_[^&]+|
3238 x-yt-cl=[0-9]+|
c1708b89 3239 hl=[^&]*|
287be8c6 3240 t=[0-9]+
b95aab84
PH
3241 )?
3242 |
3243 attribution_link\?a=[^&]+
3244 )
3245 $
975d35db 3246 '''
15870e90 3247
c4808c60 3248 _TESTS = [{
2d3d2997 3249 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3250 'only_matching': True,
dc2fc736 3251 }, {
2d3d2997 3252 'url': 'https://www.youtube.com/watch?',
dc2fc736 3253 'only_matching': True,
b95aab84
PH
3254 }, {
3255 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3256 'only_matching': True,
3257 }, {
3258 'url': 'https://www.youtube.com/watch?feature=foo',
3259 'only_matching': True,
c1708b89
PH
3260 }, {
3261 'url': 'https://www.youtube.com/watch?hl=en-GB',
3262 'only_matching': True,
287be8c6
PH
3263 }, {
3264 'url': 'https://www.youtube.com/watch?t=2372',
3265 'only_matching': True,
c4808c60
PH
3266 }]
3267
15870e90
PH
3268 def _real_extract(self, url):
3269 raise ExtractorError(
78caa52a
PH
3270 'Did you forget to quote the URL? Remember that & is a meta '
3271 'character in most shells, so you want to put the URL in quotes, '
3272 'like youtube-dl '
2d3d2997 3273 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
78caa52a 3274 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3275 expected=True)
772fd5cc
PH
3276
3277
3278class YoutubeTruncatedIDIE(InfoExtractor):
3279 IE_NAME = 'youtube:truncated_id'
3280 IE_DESC = False # Do not list
b95aab84 3281 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3282
3283 _TESTS = [{
3284 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3285 'only_matching': True,
3286 }]
3287
3288 def _real_extract(self, url):
3289 video_id = self._match_id(url)
3290 raise ExtractorError(
3291 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3292 expected=True)