]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
Revert "pull changes from remote master (#190)" (#193)
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
f8c55c66 19 compat_HTTPError,
8d81f3e3 20 compat_kwargs,
c5e8d7af 21 compat_parse_qs,
7fd002c0
S
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
c5e8d7af 27 compat_str,
4bb4a188
PH
28)
29from ..utils import (
27019dbb 30 bool_or_none,
c5e8d7af 31 clean_html,
19a107f2 32 dict_get,
9b9c5355 33 error_to_compat_str,
351f37c0 34 extract_attributes,
c5e8d7af 35 ExtractorError,
2d30521a 36 float_or_none,
4bb4a188
PH
37 get_element_by_attribute,
38 get_element_by_id,
dd27fd17 39 int_or_none,
94278f72 40 mimetype2ext,
4bb4a188 41 orderedSet,
6310acf5 42 parse_codecs,
7c80519c 43 parse_duration,
0cb58b02 44 remove_quotes,
3995d37d 45 remove_start,
cf7e015f 46 smuggle_url,
dbdaaa23 47 str_or_none,
c93d53f5 48 str_to_int,
556dbe7f 49 try_get,
c5e8d7af
PH
50 unescapeHTML,
51 unified_strdate,
cf7e015f 52 unsmuggle_url,
81c2f20b 53 uppercase_escape,
21c340b8 54 url_or_none,
6e6bc8da 55 urlencode_postdata,
c5e8d7af
PH
56)
57
5f6a1245 58
de7f3446 59class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
60 """Provide base functions for Youtube extractors"""
61 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 62 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
63
64 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
65 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
66 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 67
b2e8bc1b
JMF
68 _NETRC_MACHINE = 'youtube'
69 # If True it will raise an error if no login info is provided
70 _LOGIN_REQUIRED = False
71
66b48727 72 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
d0ba5587 73
b2e8bc1b 74 def _set_language(self):
810fb84d 75 self._set_cookie(
19a107f2 76 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 77 # YouTube sets the expire time to about two months
810fb84d 78 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 79
25f14e9f
S
80 def _ids_to_results(self, ids):
81 return [
82 self.url_result(vid_id, 'Youtube', video_id=vid_id)
83 for vid_id in ids]
84
b2e8bc1b 85 def _login(self):
83317f69 86 """
87 Attempt to log in to YouTube.
88 True is returned if successful or skipped.
89 False is returned if login failed.
90
91 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
92 """
68217024 93 username, password = self._get_login_info()
b2e8bc1b
JMF
94 # No authentication to be performed
95 if username is None:
70d35d16 96 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 97 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 98 return True
b2e8bc1b 99
7cc3570e
PH
100 login_page = self._download_webpage(
101 self._LOGIN_URL, None,
69ea8ca4
PH
102 note='Downloading login page',
103 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
104 if login_page is False:
105 return
b2e8bc1b 106
1212e997 107 login_form = self._hidden_inputs(login_page)
c5e8d7af 108
e00eb564
S
109 def req(url, f_req, note, errnote):
110 data = login_form.copy()
111 data.update({
112 'pstMsg': 1,
113 'checkConnection': 'youtube',
114 'checkedDomains': 'youtube',
115 'hl': 'en',
116 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 117 'f.req': json.dumps(f_req),
e00eb564
S
118 'flowName': 'GlifWebSignIn',
119 'flowEntry': 'ServiceLogin',
baf67a60
S
120 # TODO: reverse actual botguard identifier generation algo
121 'bgRequest': '["identifier",""]',
041bc3ad 122 })
e00eb564
S
123 return self._download_json(
124 url, None, note=note, errnote=errnote,
125 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
126 fatal=False,
127 data=urlencode_postdata(data), headers={
128 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
129 'Google-Accounts-XSRF': 1,
130 })
131
3995d37d
S
132 def warn(message):
133 self._downloader.report_warning(message)
134
135 lookup_req = [
136 username,
137 None, [], None, 'US', None, None, 2, False, True,
138 [
139 None, None,
140 [2, 1, None, 1,
141 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
142 None, [], 4],
143 1, [None, None, []], None, None, None, True
144 ],
145 username,
146 ]
147
e00eb564 148 lookup_results = req(
3995d37d 149 self._LOOKUP_URL, lookup_req,
e00eb564
S
150 'Looking up account info', 'Unable to look up account info')
151
152 if lookup_results is False:
153 return False
041bc3ad 154
3995d37d
S
155 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
156 if not user_hash:
157 warn('Unable to extract user hash')
158 return False
159
160 challenge_req = [
161 user_hash,
162 None, 1, None, [1, None, None, None, [password, None, True]],
163 [
164 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
165 1, [None, None, []], None, None, None, True
166 ]]
83317f69 167
3995d37d
S
168 challenge_results = req(
169 self._CHALLENGE_URL, challenge_req,
170 'Logging in', 'Unable to log in')
83317f69 171
3995d37d 172 if challenge_results is False:
e00eb564 173 return
83317f69 174
3995d37d
S
175 login_res = try_get(challenge_results, lambda x: x[0][5], list)
176 if login_res:
177 login_msg = try_get(login_res, lambda x: x[5], compat_str)
178 warn(
179 'Unable to login: %s' % 'Invalid password'
180 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
181 return False
182
183 res = try_get(challenge_results, lambda x: x[0][-1], list)
184 if not res:
185 warn('Unable to extract result entry')
186 return False
187
9a6628aa
S
188 login_challenge = try_get(res, lambda x: x[0][0], list)
189 if login_challenge:
190 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
191 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
192 # SEND_SUCCESS - TFA code has been successfully sent to phone
193 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 194 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
195 if status == 'QUOTA_EXCEEDED':
196 warn('Exceeded the limit of TFA codes, try later')
197 return False
198
199 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
200 if not tl:
201 warn('Unable to extract TL')
202 return False
203
204 tfa_code = self._get_tfa_info('2-step verification code')
205
206 if not tfa_code:
207 warn(
208 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
209 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
210 return False
211
212 tfa_code = remove_start(tfa_code, 'G-')
213
214 tfa_req = [
215 user_hash, None, 2, None,
216 [
217 9, None, None, None, None, None, None, None,
218 [None, tfa_code, True, 2]
219 ]]
220
221 tfa_results = req(
222 self._TFA_URL.format(tl), tfa_req,
223 'Submitting TFA code', 'Unable to submit TFA code')
224
225 if tfa_results is False:
226 return False
227
228 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
229 if tfa_res:
230 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
231 warn(
232 'Unable to finish TFA: %s' % 'Invalid TFA code'
233 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
234 return False
235
236 check_cookie_url = try_get(
237 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
238 else:
239 CHALLENGES = {
240 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
241 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
242 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
243 }
244 challenge = CHALLENGES.get(
245 challenge_str,
246 '%s returned error %s.' % (self.IE_NAME, challenge_str))
247 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
248 return False
3995d37d
S
249 else:
250 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
251
252 if not check_cookie_url:
253 warn('Unable to extract CheckCookie URL')
254 return False
e00eb564
S
255
256 check_cookie_results = self._download_webpage(
3995d37d
S
257 check_cookie_url, None, 'Checking cookie', fatal=False)
258
259 if check_cookie_results is False:
260 return False
e00eb564 261
3995d37d
S
262 if 'https://myaccount.google.com/' not in check_cookie_results:
263 warn('Unable to log in')
b2e8bc1b 264 return False
e00eb564 265
b2e8bc1b
JMF
266 return True
267
30226342 268 def _download_webpage_handle(self, *args, **kwargs):
c1148516
S
269 query = kwargs.get('query', {}).copy()
270 query['disable_polymer'] = 'true'
271 kwargs['query'] = query
30226342 272 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
273 *args, **compat_kwargs(kwargs))
274
b2e8bc1b
JMF
275 def _real_initialize(self):
276 if self._downloader is None:
277 return
42939b61 278 self._set_language()
b2e8bc1b
JMF
279 if not self._login():
280 return
c5e8d7af 281
8377574c 282
8e7aad20 283class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 284 # Extract entries from page with "Load more" button
648e6a1f
S
285 def _entries(self, page, playlist_id):
286 more_widget_html = content_html = page
287 for page_num in itertools.count(1):
061a75ed
S
288 for entry in self._process_page(content_html):
289 yield entry
648e6a1f
S
290
291 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
292 if not mobj:
293 break
294
f8c55c66
S
295 count = 0
296 retries = 3
297 while count <= retries:
298 try:
299 # Downloading page may result in intermittent 5xx HTTP error
300 # that is usually worked around with a retry
301 more = self._download_json(
19a107f2 302 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
f8c55c66
S
303 'Downloading page #%s%s'
304 % (page_num, ' (retry #%d)' % count if count else ''),
19a107f2 305 transform_source=uppercase_escape)
f8c55c66
S
306 break
307 except ExtractorError as e:
308 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
309 count += 1
310 if count <= retries:
311 continue
312 raise
313
648e6a1f
S
314 content_html = more['content_html']
315 if not content_html.strip():
316 # Some webpages show a "Load more" button but they don't
317 # have more videos
318 break
319 more_widget_html = more['load_more_widget_html']
320
061a75ed
S
321
322class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
323 def _process_page(self, content):
324 for video_id, video_title in self.extract_videos_from_page(content):
325 yield self.url_result(video_id, 'Youtube', video_id, video_title)
326
351f37c0
S
327 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
328 for mobj in re.finditer(video_re, page):
648e6a1f
S
329 # The link with index 0 is not the first video of the playlist (not sure if still actual)
330 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
331 continue
332 video_id = mobj.group('id')
351f37c0
S
333 video_title = unescapeHTML(
334 mobj.group('title')) if 'title' in mobj.groupdict() else None
648e6a1f
S
335 if video_title:
336 video_title = video_title.strip()
351f37c0
S
337 if video_title == '► Play all':
338 video_title = None
648e6a1f
S
339 try:
340 idx = ids_in_page.index(video_id)
341 if video_title and not titles_in_page[idx]:
342 titles_in_page[idx] = video_title
343 except ValueError:
344 ids_in_page.append(video_id)
345 titles_in_page.append(video_title)
351f37c0
S
346
347 def extract_videos_from_page(self, page):
348 ids_in_page = []
349 titles_in_page = []
350 self.extract_videos_from_page_impl(
351 self._VIDEO_RE, page, ids_in_page, titles_in_page)
648e6a1f
S
352 return zip(ids_in_page, titles_in_page)
353
354
061a75ed
S
355class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
356 def _process_page(self, content):
6dee688e
S
357 for playlist_id in orderedSet(re.findall(
358 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
359 content)):
061a75ed
S
360 yield self.url_result(
361 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
362
0c148415
S
363 def _real_extract(self, url):
364 playlist_id = self._match_id(url)
365 webpage = self._download_webpage(url, playlist_id)
0c148415 366 title = self._og_search_title(webpage, fatal=False)
061a75ed 367 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
368
369
360e1ca5 370class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 371 IE_DESC = 'YouTube.com'
cb7dfeea 372 _VALID_URL = r"""(?x)^
c5e8d7af 373 (
edb53e2d 374 (?:https?://|//) # http(s):// or protocol-independent URL
66b48727 375 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
484aaeb2 376 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 377 (?:www\.)?pwnyoutube\.com/|
8b561bfc 378 (?:www\.)?hooktube\.com/|
f7000f3a 379 (?:www\.)?yourepeat\.com/|
e69ae5b9 380 tube\.majestyc\.net/|
ba036333 381 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
77d95677 382 (?:(?:www|dev)\.)?invidio\.us/|
ba036333 383 (?:(?:www|no)\.)?invidiou\.sh/|
384 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
8ae113ca 385 (?:www\.)?invidious\.kabi\.tk/|
ba036333 386 (?:www\.)?invidious\.13ad\.de/|
791d2e81 387 (?:www\.)?invidious\.mastodon\.host/|
494d664e 388 (?:www\.)?invidious\.nixnet\.xyz/|
666d808e 389 (?:www\.)?invidious\.drycat\.fr/|
ba036333 390 (?:www\.)?tube\.poal\.co/|
8ae113ca 391 (?:www\.)?vid\.wxzm\.sx/|
494d664e 392 (?:www\.)?yt\.elukerio\.org/|
894b3826 393 (?:www\.)?yt\.lelux\.fi/|
bff90fc5 394 (?:www\.)?kgg2m7yk5aybusll\.onion/|
395 (?:www\.)?qklhadlycap4cnod\.onion/|
396 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
397 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
398 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
399 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
33c1c7d8 400 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
e69ae5b9 401 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
402 (?:.*?\#/)? # handle anchor (#/) redirect urls
403 (?: # the various things that can precede the ID:
ac7553d0 404 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 405 |(?: # or the v= param in all its forms
f7000f3a 406 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 407 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 408 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
409 v=
410 )
f4b05232 411 ))
cbaed4bb
S
412 |(?:
413 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
414 vid\.plus| # or vid.plus/xxxx
415 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 416 )/
edb53e2d 417 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 418 )
c5e8d7af 419 )? # all until now is optional -> you can pass the naked ID
8963d9c2 420 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
421 (?!.*?\blist=
422 (?:
423 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
424 WL # WL are handled by the watch later IE
425 )
426 )
c5e8d7af 427 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 428 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 429 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26 430 _formats = {
c2d3cb4c 431 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
432 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
433 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
434 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
435 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
436 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
437 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
438 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 439 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 440 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
441 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
442 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
443 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
444 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
445 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 446 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 447 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
448 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 449
450
451 # 3D videos
c2d3cb4c 452 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
453 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
454 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
455 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 456 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
457 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
458 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 459
96fb5605 460 # Apple HTTP Live Streaming
11f12195 461 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 462 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
463 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
464 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
465 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
466 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 467 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
468 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
469
470 # DASH mp4 video
d23028a8
S
471 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
472 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
473 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
474 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
475 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 476 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
477 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
478 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
479 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
480 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
481 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
482 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 483
f6f1fc92 484 # Dash mp4 audio
d23028a8
S
485 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
486 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
487 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
488 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
489 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
490 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
491 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
492
493 # Dash webm
d23028a8
S
494 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
495 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
496 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
497 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
498 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
499 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
500 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
501 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
502 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
503 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
504 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
505 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
507 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
508 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 509 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
510 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
511 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
512 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
513 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
514 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
515 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
516
517 # Dash webm audio
d23028a8
S
518 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
519 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 520
0857baad 521 # Dash webm audio with opus inside
d23028a8
S
522 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
523 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
524 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 525
ce6b9a2d
PH
526 # RTMP (unnamed)
527 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
528
529 # av01 video only formats sometimes served with "unknown" codecs
530 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
531 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
532 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
533 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 534 }
19041a38 535 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 536
fd5c4aab
S
537 _GEO_BYPASS = False
538
78caa52a 539 IE_NAME = 'youtube'
2eb88d95
PH
540 _TESTS = [
541 {
2d3d2997 542 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
543 'info_dict': {
544 'id': 'BaW_jenozKc',
545 'ext': 'mp4',
546 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
547 'uploader': 'Philipp Hagemeister',
548 'uploader_id': 'phihag',
ec85ded8 549 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
550 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
551 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e
PH
552 'upload_date': '20121002',
553 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
554 'categories': ['Science & Technology'],
000b6b5a 555 'tags': ['youtube-dl'],
556dbe7f 556 'duration': 10,
dbdaaa23 557 'view_count': int,
3e7c1224
PH
558 'like_count': int,
559 'dislike_count': int,
7c80519c 560 'start_time': 1,
297a564b 561 'end_time': 9,
2eb88d95 562 }
0e853ca4 563 },
0e853ca4 564 {
2d3d2997 565 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
4bc3a23e
PH
566 'note': 'Test generic use_cipher_signature video (#897)',
567 'info_dict': {
568 'id': 'UxxajLWwzqY',
569 'ext': 'mp4',
570 'upload_date': '20120506',
571 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 572 'alt_title': 'I Love It (feat. Charli XCX)',
19a107f2 573 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
000b6b5a
S
574 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
575 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
576 'iconic ep', 'iconic', 'love', 'it'],
556dbe7f 577 'duration': 180,
4bc3a23e
PH
578 'uploader': 'Icona Pop',
579 'uploader_id': 'IconaPop',
ec85ded8 580 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
0cb58b02 581 'creator': 'Icona Pop',
936784b2
S
582 'track': 'I Love It (feat. Charli XCX)',
583 'artist': 'Icona Pop',
2eb88d95 584 }
c108eb73
JMF
585 },
586 {
4bc3a23e
PH
587 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
588 'note': 'Test VEVO video with age protection (#956)',
589 'info_dict': {
590 'id': '07FYdnEawAQ',
591 'ext': 'mp4',
592 'upload_date': '20130703',
4fe54c12 593 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
0cb58b02 594 'alt_title': 'Tunnel Vision',
4fe54c12 595 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
556dbe7f 596 'duration': 419,
4bc3a23e
PH
597 'uploader': 'justintimberlakeVEVO',
598 'uploader_id': 'justintimberlakeVEVO',
ec85ded8 599 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
0cb58b02 600 'creator': 'Justin Timberlake',
7e72694b 601 'track': 'Tunnel Vision',
936784b2 602 'artist': 'Justin Timberlake',
34952f09 603 'age_limit': 18,
c108eb73
JMF
604 }
605 },
fccd3771 606 {
4bc3a23e
PH
607 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
608 'note': 'Embed-only video (#1746)',
609 'info_dict': {
610 'id': 'yZIXLfi8CZQ',
611 'ext': 'mp4',
612 'upload_date': '20120608',
613 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
614 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
615 'uploader': 'SET India',
94bfcd23 616 'uploader_id': 'setindia',
ec85ded8 617 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 618 'age_limit': 18,
fccd3771
PH
619 }
620 },
11b56058 621 {
2d3d2997 622 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
623 'note': 'Use the first video ID in the URL',
624 'info_dict': {
625 'id': 'BaW_jenozKc',
626 'ext': 'mp4',
627 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
628 'uploader': 'Philipp Hagemeister',
629 'uploader_id': 'phihag',
ec85ded8 630 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058
PM
631 'upload_date': '20121002',
632 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
633 'categories': ['Science & Technology'],
634 'tags': ['youtube-dl'],
556dbe7f 635 'duration': 10,
dbdaaa23 636 'view_count': int,
11b56058
PM
637 'like_count': int,
638 'dislike_count': int,
34a7de29
S
639 },
640 'params': {
641 'skip_download': True,
642 },
11b56058 643 },
dd27fd17 644 {
2d3d2997 645 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
646 'note': '256k DASH audio (format 141) via DASH manifest',
647 'info_dict': {
648 'id': 'a9LDPn-MO4I',
649 'ext': 'm4a',
650 'upload_date': '20121002',
651 'uploader_id': '8KVIDEO',
ec85ded8 652 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
653 'description': '',
654 'uploader': '8KVIDEO',
655 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 656 },
4bc3a23e
PH
657 'params': {
658 'youtube_include_dash_manifest': True,
659 'format': '141',
4919603f 660 },
de3c7fe0 661 'skip': 'format 141 not served anymore',
dd27fd17 662 },
3489b7d2
JMF
663 # DASH manifest with encrypted signature
664 {
78caa52a
PH
665 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
666 'info_dict': {
667 'id': 'IB3lcPjvWLA',
668 'ext': 'm4a',
4fe54c12
S
669 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
670 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
556dbe7f 671 'duration': 244,
78caa52a
PH
672 'uploader': 'AfrojackVEVO',
673 'uploader_id': 'AfrojackVEVO',
674 'upload_date': '20131011',
3489b7d2 675 },
4bc3a23e 676 'params': {
78caa52a 677 'youtube_include_dash_manifest': True,
de3c7fe0 678 'format': '141/bestaudio[ext=m4a]',
3489b7d2
JMF
679 },
680 },
aaeb86f6
S
681 # JS player signature function name containing $
682 {
683 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
684 'info_dict': {
685 'id': 'nfWlot6h_JM',
686 'ext': 'm4a',
687 'title': 'Taylor Swift - Shake It Off',
19a107f2 688 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
556dbe7f 689 'duration': 242,
aaeb86f6
S
690 'uploader': 'TaylorSwiftVEVO',
691 'uploader_id': 'TaylorSwiftVEVO',
692 'upload_date': '20140818',
19a107f2 693 'creator': 'Taylor Swift',
aaeb86f6
S
694 },
695 'params': {
696 'youtube_include_dash_manifest': True,
de3c7fe0 697 'format': '141/bestaudio[ext=m4a]',
aaeb86f6
S
698 },
699 },
aa79ac0c
PH
700 # Controversy video
701 {
702 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
703 'info_dict': {
704 'id': 'T4XJQO3qol8',
705 'ext': 'mp4',
556dbe7f 706 'duration': 219,
aa79ac0c 707 'upload_date': '20100909',
4fe54c12 708 'uploader': 'Amazing Atheist',
aa79ac0c 709 'uploader_id': 'TheAmazingAtheist',
ec85ded8 710 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c
PH
711 'title': 'Burning Everyone\'s Koran',
712 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
713 }
c522adb1
JMF
714 },
715 # Normal age-gate video (No vevo, embed allowed)
716 {
2d3d2997 717 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
718 'info_dict': {
719 'id': 'HtVdAasjOgU',
720 'ext': 'mp4',
721 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 722 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 723 'duration': 142,
c522adb1
JMF
724 'uploader': 'The Witcher',
725 'uploader_id': 'WitcherGame',
ec85ded8 726 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 727 'upload_date': '20140605',
34952f09 728 'age_limit': 18,
c522adb1
JMF
729 },
730 },
fccae2b9
S
731 # Age-gate video with encrypted signature
732 {
2d3d2997 733 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
fccae2b9
S
734 'info_dict': {
735 'id': '6kLq3WMV1nU',
4fe54c12 736 'ext': 'mp4',
fccae2b9
S
737 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
738 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
eb6793ba 739 'duration': 246,
fccae2b9
S
740 'uploader': 'LloydVEVO',
741 'uploader_id': 'LloydVEVO',
ec85ded8 742 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 743 'upload_date': '20110629',
34952f09 744 'age_limit': 18,
fccae2b9
S
745 },
746 },
067aa17e 747 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
7d02dcfa 748 # YouTube Red ad is not captured for creator
774e208f
PH
749 {
750 'url': '__2ABJjxzNo',
751 'info_dict': {
752 'id': '__2ABJjxzNo',
753 'ext': 'mp4',
556dbe7f 754 'duration': 266,
774e208f
PH
755 'upload_date': '20100430',
756 'uploader_id': 'deadmau5',
ec85ded8 757 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
19a107f2 758 'creator': 'deadmau5',
774e208f
PH
759 'description': 'md5:12c56784b8032162bb936a5f76d55360',
760 'uploader': 'deadmau5',
761 'title': 'Deadmau5 - Some Chords (HD)',
19a107f2 762 'alt_title': 'Some Chords',
774e208f
PH
763 },
764 'expected_warnings': [
765 'DASH manifest missing',
766 ]
e52a40ab 767 },
067aa17e 768 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
769 {
770 'url': 'lqQg6PlCWgI',
771 'info_dict': {
772 'id': 'lqQg6PlCWgI',
773 'ext': 'mp4',
556dbe7f 774 'duration': 6085,
90227264 775 'upload_date': '20150827',
cbe2bd91 776 'uploader_id': 'olympic',
ec85ded8 777 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 778 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 779 'uploader': 'Olympic',
cbe2bd91
PH
780 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
781 },
782 'params': {
783 'skip_download': 'requires avconv',
e52a40ab 784 }
cbe2bd91 785 },
6271f1ca
PH
786 # Non-square pixels
787 {
788 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
789 'info_dict': {
790 'id': '_b-2C3KPAM0',
791 'ext': 'mp4',
792 'stretched_ratio': 16 / 9.,
556dbe7f 793 'duration': 85,
6271f1ca
PH
794 'upload_date': '20110310',
795 'uploader_id': 'AllenMeow',
ec85ded8 796 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 797 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 798 'uploader': '孫ᄋᄅ',
6271f1ca
PH
799 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
800 },
06b491eb
S
801 },
802 # url_encoded_fmt_stream_map is empty string
803 {
804 'url': 'qEJwOuvDf7I',
805 'info_dict': {
806 'id': 'qEJwOuvDf7I',
f57b7835 807 'ext': 'webm',
06b491eb
S
808 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
809 'description': '',
810 'upload_date': '20150404',
811 'uploader_id': 'spbelect',
812 'uploader': 'Наблюдатели Петербурга',
813 },
814 'params': {
815 'skip_download': 'requires avconv',
e323cf3f
S
816 },
817 'skip': 'This live event has ended.',
06b491eb 818 },
067aa17e 819 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
820 {
821 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
822 'info_dict': {
823 'id': 'FIl7x6_3R5Y',
eb6793ba 824 'ext': 'webm',
da77d856
S
825 'title': 'md5:7b81415841e02ecd4313668cde88737a',
826 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 827 'duration': 220,
da77d856
S
828 'upload_date': '20150625',
829 'uploader_id': 'dorappi2000',
ec85ded8 830 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 831 'uploader': 'dorappi2000',
eb6793ba 832 'formats': 'mincount:31',
da77d856 833 },
eb6793ba 834 'skip': 'not actual anymore',
2ee8f5d8 835 },
8a1a26ce
YCH
836 # DASH manifest with segment_list
837 {
838 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
839 'md5': '8ce563a1d667b599d21064e982ab9e31',
840 'info_dict': {
841 'id': 'CsmdDsKjzN8',
842 'ext': 'mp4',
17ee98e1 843 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
844 'uploader': 'Airtek',
845 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
846 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
847 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
848 },
849 'params': {
850 'youtube_include_dash_manifest': True,
851 'format': '135', # bestvideo
be49068d
S
852 },
853 'skip': 'This live event has ended.',
2ee8f5d8 854 },
cf7e015f
S
855 {
856 # Multifeed videos (multiple cameras), URL is for Main Camera
857 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
858 'info_dict': {
859 'id': 'jqWvoWXjCVs',
860 'title': 'teamPGP: Rocket League Noob Stream',
861 'description': 'md5:dc7872fb300e143831327f1bae3af010',
862 },
863 'playlist': [{
864 'info_dict': {
865 'id': 'jqWvoWXjCVs',
866 'ext': 'mp4',
867 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
868 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 869 'duration': 7335,
cf7e015f
S
870 'upload_date': '20150721',
871 'uploader': 'Beer Games Beer',
872 'uploader_id': 'beergamesbeer',
ec85ded8 873 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 874 'license': 'Standard YouTube License',
cf7e015f
S
875 },
876 }, {
877 'info_dict': {
878 'id': '6h8e8xoXJzg',
879 'ext': 'mp4',
880 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
881 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 882 'duration': 7337,
cf7e015f
S
883 'upload_date': '20150721',
884 'uploader': 'Beer Games Beer',
885 'uploader_id': 'beergamesbeer',
ec85ded8 886 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 887 'license': 'Standard YouTube License',
cf7e015f
S
888 },
889 }, {
890 'info_dict': {
891 'id': 'PUOgX5z9xZw',
892 'ext': 'mp4',
893 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
894 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 895 'duration': 7337,
cf7e015f
S
896 'upload_date': '20150721',
897 'uploader': 'Beer Games Beer',
898 'uploader_id': 'beergamesbeer',
ec85ded8 899 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 900 'license': 'Standard YouTube License',
cf7e015f
S
901 },
902 }, {
903 'info_dict': {
904 'id': 'teuwxikvS5k',
905 'ext': 'mp4',
906 'title': 'teamPGP: Rocket League Noob Stream (zim)',
907 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 908 'duration': 7334,
cf7e015f
S
909 'upload_date': '20150721',
910 'uploader': 'Beer Games Beer',
911 'uploader_id': 'beergamesbeer',
ec85ded8 912 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 913 'license': 'Standard YouTube License',
cf7e015f
S
914 },
915 }],
916 'params': {
917 'skip_download': True,
918 },
4fe54c12 919 'skip': 'This video is not available.',
cbaed4bb 920 },
f9f49d87 921 {
067aa17e 922 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
923 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
924 'info_dict': {
925 'id': 'gVfLd0zydlo',
926 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
927 },
928 'playlist_count': 2,
be49068d 929 'skip': 'Not multifeed anymore',
f9f49d87 930 },
cbaed4bb 931 {
2d3d2997 932 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 933 'only_matching': True,
0e49d9a6 934 },
6d4fc66b 935 {
2d3d2997 936 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
937 'only_matching': True,
938 },
0e49d9a6 939 {
067aa17e 940 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 941 # Also tests cut-off URL expansion in video description (see
067aa17e
S
942 # https://github.com/ytdl-org/youtube-dl/issues/1892,
943 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
944 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
945 'info_dict': {
946 'id': 'lsguqyKfVQg',
947 'ext': 'mp4',
948 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 949 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 950 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 951 'duration': 133,
0e49d9a6
LL
952 'upload_date': '20151119',
953 'uploader_id': 'IronSoulElf',
ec85ded8 954 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 955 'uploader': 'IronSoulElf',
eb6793ba
S
956 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
957 'track': 'Dark Walk - Position Music',
958 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 959 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
960 },
961 'params': {
962 'skip_download': True,
963 },
964 },
61f92af1 965 {
067aa17e 966 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
967 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
968 'only_matching': True,
969 },
313dfc45
LL
970 {
971 # Video with yt:stretch=17:0
972 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
973 'info_dict': {
974 'id': 'Q39EVAstoRM',
975 'ext': 'mp4',
976 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
977 'description': 'md5:ee18a25c350637c8faff806845bddee9',
978 'upload_date': '20151107',
979 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
980 'uploader': 'CH GAMER DROID',
981 },
982 'params': {
983 'skip_download': True,
984 },
be49068d 985 'skip': 'This video does not exist.',
313dfc45 986 },
7caf9830
S
987 {
988 # Video licensed under Creative Commons
989 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
990 'info_dict': {
991 'id': 'M4gD1WSo5mA',
992 'ext': 'mp4',
993 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
994 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 995 'duration': 721,
7caf9830
S
996 'upload_date': '20150127',
997 'uploader_id': 'BerkmanCenter',
ec85ded8 998 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 999 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1000 'license': 'Creative Commons Attribution license (reuse allowed)',
1001 },
1002 'params': {
1003 'skip_download': True,
1004 },
1005 },
fd050249
S
1006 {
1007 # Channel-like uploader_url
1008 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1009 'info_dict': {
1010 'id': 'eQcmzGIKrzg',
1011 'ext': 'mp4',
1012 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1013 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 1014 'duration': 4060,
fd050249 1015 'upload_date': '20151119',
eb6793ba 1016 'uploader': 'Bernie Sanders',
fd050249 1017 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1018 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1019 'license': 'Creative Commons Attribution license (reuse allowed)',
1020 },
1021 'params': {
1022 'skip_download': True,
1023 },
1024 },
040ac686
S
1025 {
1026 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1027 'only_matching': True,
7f29cf54
S
1028 },
1029 {
067aa17e 1030 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1031 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1032 'only_matching': True,
6496ccb4
S
1033 },
1034 {
1035 # Rental video preview
1036 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1037 'info_dict': {
1038 'id': 'uGpuVWrhIzE',
1039 'ext': 'mp4',
1040 'title': 'Piku - Trailer',
1041 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1042 'upload_date': '20150811',
1043 'uploader': 'FlixMatrix',
1044 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1045 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1046 'license': 'Standard YouTube License',
1047 },
1048 'params': {
1049 'skip_download': True,
1050 },
eb6793ba 1051 'skip': 'This video is not available.',
022a5d66 1052 },
12afdc2a
S
1053 {
1054 # YouTube Red video with episode data
1055 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1056 'info_dict': {
1057 'id': 'iqKdEhx-dD4',
1058 'ext': 'mp4',
1059 'title': 'Isolation - Mind Field (Ep 1)',
4fe54c12 1060 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
556dbe7f 1061 'duration': 2085,
12afdc2a
S
1062 'upload_date': '20170118',
1063 'uploader': 'Vsauce',
1064 'uploader_id': 'Vsauce',
1065 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1066 'series': 'Mind Field',
1067 'season_number': 1,
1068 'episode_number': 1,
1069 },
1070 'params': {
1071 'skip_download': True,
1072 },
1073 'expected_warnings': [
1074 'Skipping DASH manifest',
1075 ],
1076 },
c7121fa7
S
1077 {
1078 # The following content has been identified by the YouTube community
1079 # as inappropriate or offensive to some audiences.
1080 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1081 'info_dict': {
1082 'id': '6SJNVb0GnPI',
1083 'ext': 'mp4',
1084 'title': 'Race Differences in Intelligence',
1085 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1086 'duration': 965,
1087 'upload_date': '20140124',
1088 'uploader': 'New Century Foundation',
1089 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1090 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1091 },
1092 'params': {
1093 'skip_download': True,
1094 },
1095 },
022a5d66
S
1096 {
1097 # itag 212
1098 'url': '1t24XAntNCY',
1099 'only_matching': True,
fd5c4aab
S
1100 },
1101 {
1102 # geo restricted to JP
1103 'url': 'sJL6WA-aGkQ',
1104 'only_matching': True,
1105 },
d0ba5587
S
1106 {
1107 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1108 'only_matching': True,
1109 },
cd5a74a2
S
1110 {
1111 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1112 'only_matching': True,
1113 },
825cd268
RA
1114 {
1115 # DRM protected
1116 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1117 'only_matching': True,
4fe54c12
S
1118 },
1119 {
1120 # Video with unsupported adaptive stream type formats
1121 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1122 'info_dict': {
1123 'id': 'Z4Vy8R84T1U',
1124 'ext': 'mp4',
1125 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1126 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1127 'duration': 433,
1128 'upload_date': '20130923',
1129 'uploader': 'Amelia Putri Harwita',
1130 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1131 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1132 'formats': 'maxcount:10',
1133 },
1134 'params': {
1135 'skip_download': True,
1136 'youtube_include_dash_manifest': False,
1137 },
5caabd3c 1138 },
1139 {
822b9d9c 1140 # Youtube Music Auto-generated description
5caabd3c 1141 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1142 'info_dict': {
1143 'id': 'MgNrAu2pzNs',
1144 'ext': 'mp4',
1145 'title': 'Voyeur Girl',
1146 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1147 'upload_date': '20190312',
19a107f2
AG
1148 'uploader': 'Various Artists - Topic',
1149 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
5caabd3c 1150 'artist': 'Stephen',
1151 'track': 'Voyeur Girl',
1152 'album': 'it\'s too much love to know my dear',
1153 'release_date': '20190313',
1154 'release_year': 2019,
1155 },
1156 'params': {
1157 'skip_download': True,
1158 },
1159 },
1160 {
822b9d9c 1161 # Youtube Music Auto-generated description
5caabd3c 1162 # Retrieve 'artist' field from 'Artist:' in video description
1163 # when it is present on youtube music video
5caabd3c 1164 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1165 'info_dict': {
1166 'id': 'k0jLE7tTwjY',
1167 'ext': 'mp4',
1168 'title': 'Latch Feat. Sam Smith',
1169 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1170 'upload_date': '20150110',
1171 'uploader': 'Various Artists - Topic',
1172 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1173 'artist': 'Disclosure',
1174 'track': 'Latch Feat. Sam Smith',
1175 'album': 'Latch Featuring Sam Smith',
1176 'release_date': '20121008',
1177 'release_year': 2012,
1178 },
1179 'params': {
1180 'skip_download': True,
1181 },
1182 },
1183 {
822b9d9c 1184 # Youtube Music Auto-generated description
5caabd3c 1185 # handle multiple artists on youtube music video
1186 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1187 'info_dict': {
1188 'id': '74qn0eJSjpA',
1189 'ext': 'mp4',
1190 'title': 'Eastside',
1191 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1192 'upload_date': '20180710',
1193 'uploader': 'Benny Blanco - Topic',
1194 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1195 'artist': 'benny blanco, Halsey, Khalid',
1196 'track': 'Eastside',
1197 'album': 'Eastside',
1198 'release_date': '20180713',
1199 'release_year': 2018,
1200 },
1201 'params': {
1202 'skip_download': True,
1203 },
1204 },
1205 {
822b9d9c 1206 # Youtube Music Auto-generated description
5caabd3c 1207 # handle youtube music video with release_year and no release_date
1208 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1209 'info_dict': {
1210 'id': '-hcAI0g-f5M',
1211 'ext': 'mp4',
1212 'title': 'Put It On Me',
19a107f2 1213 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
5caabd3c 1214 'upload_date': '20180426',
1215 'uploader': 'Matt Maeson - Topic',
1216 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1217 'artist': 'Matt Maeson',
1218 'track': 'Put It On Me',
1219 'album': 'The Hearse',
1220 'release_date': None,
1221 'release_year': 2018,
1222 },
1223 'params': {
1224 'skip_download': True,
1225 },
1226 },
66b48727
RA
1227 {
1228 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1229 'only_matching': True,
1230 },
2eb88d95
PH
1231 ]
1232
e0df6211
PH
1233 def __init__(self, *args, **kwargs):
1234 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1235 self._player_cache = {}
e0df6211 1236
c5e8d7af
PH
1237 def report_video_info_webpage_download(self, video_id):
1238 """Report attempt to download video info webpage."""
69ea8ca4 1239 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1240
c5e8d7af
PH
1241 def report_information_extraction(self, video_id):
1242 """Report attempt to extract video information."""
69ea8ca4 1243 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1244
1245 def report_unavailable_format(self, video_id, format):
1246 """Report extracted video URL."""
69ea8ca4 1247 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1248
1249 def report_rtmp_download(self):
1250 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1251 self.to_screen('RTMP download detected')
c5e8d7af 1252
60064c53
PH
1253 def _signature_cache_id(self, example_sig):
1254 """ Return a string representation of a signature """
78caa52a 1255 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1256
b827ee92 1257 def _extract_signature_function(self, video_id, player_url, example_sig):
19a107f2
AG
1258 id_m = re.match(
1259 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
1260 player_url)
1261 if not id_m:
1262 raise ExtractorError('Cannot identify player %r' % player_url)
1263 player_type = id_m.group('ext')
1264 player_id = id_m.group('id')
e0df6211 1265
c4417ddb 1266 # Read from filesystem cache
60064c53
PH
1267 func_id = '%s_%s_%s' % (
1268 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1269 assert os.path.basename(func_id) == func_id
a0e07d31 1270
69ea8ca4 1271 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1272 if cache_spec is not None:
78caa52a 1273 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1274
6d1a55a5
PH
1275 download_note = (
1276 'Downloading player %s' % player_url
1277 if self._downloader.params.get('verbose') else
1278 'Downloading %s player %s' % (player_type, player_id)
1279 )
e0df6211
PH
1280 if player_type == 'js':
1281 code = self._download_webpage(
1282 player_url, video_id,
6d1a55a5 1283 note=download_note,
69ea8ca4 1284 errnote='Download of %s failed' % player_url)
83799698 1285 res = self._parse_sig_js(code)
c4417ddb 1286 elif player_type == 'swf':
e0df6211
PH
1287 urlh = self._request_webpage(
1288 player_url, video_id,
6d1a55a5 1289 note=download_note,
69ea8ca4 1290 errnote='Download of %s failed' % player_url)
e0df6211 1291 code = urlh.read()
83799698 1292 res = self._parse_sig_swf(code)
e0df6211
PH
1293 else:
1294 assert False, 'Invalid player type %r' % player_type
1295
785521bf
PH
1296 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1297 cache_res = res(test_string)
1298 cache_spec = [ord(c) for c in cache_res]
83799698 1299
69ea8ca4 1300 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1301 return res
1302
60064c53 1303 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1304 def gen_sig_code(idxs):
1305 def _genslice(start, end, step):
78caa52a 1306 starts = '' if start == 0 else str(start)
8bcc8756 1307 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1308 steps = '' if step == 1 else (':%d' % step)
78caa52a 1309 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1310
1311 step = None
7af808a5
PH
1312 # Quelch pyflakes warnings - start will be set when step is set
1313 start = '(Never used)'
edf3e38e
PH
1314 for i, prev in zip(idxs[1:], idxs[:-1]):
1315 if step is not None:
1316 if i - prev == step:
1317 continue
1318 yield _genslice(start, prev, step)
1319 step = None
1320 continue
1321 if i - prev in [-1, 1]:
1322 step = i - prev
1323 start = prev
1324 continue
1325 else:
78caa52a 1326 yield 's[%d]' % prev
edf3e38e 1327 if step is None:
78caa52a 1328 yield 's[%d]' % i
edf3e38e
PH
1329 else:
1330 yield _genslice(start, i, step)
1331
78caa52a 1332 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1333 cache_res = func(test_string)
edf3e38e 1334 cache_spec = [ord(c) for c in cache_res]
78caa52a 1335 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1336 signature_id_tuple = '(%s)' % (
1337 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1338 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1339 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1340 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1341
e0df6211
PH
1342 def _parse_sig_js(self, jscode):
1343 funcname = self._search_regex(
abefc03f
S
1344 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1345 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
31ce6e99 1346 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1347 # Obsolete patterns
1348 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1349 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1350 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1351 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1352 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1353 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1354 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1355 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1356 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1357
1358 jsi = JSInterpreter(jscode)
1359 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1360 return lambda s: initial_function([s])
1361
1362 def _parse_sig_swf(self, file_contents):
54256267 1363 swfi = SWFInterpreter(file_contents)
78caa52a 1364 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1365 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1366 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1367 return lambda s: initial_function([s])
1368
83799698 1369 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1370 """Turn the encrypted s field into a working signature"""
6b37f0be 1371
c8bf86d5 1372 if player_url is None:
69ea8ca4 1373 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1374
69ea8ca4 1375 if player_url.startswith('//'):
78caa52a 1376 player_url = 'https:' + player_url
3c90cc8b
S
1377 elif not re.match(r'https?://', player_url):
1378 player_url = compat_urlparse.urljoin(
1379 'https://www.youtube.com', player_url)
c8bf86d5 1380 try:
62af3a0e 1381 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1382 if player_id not in self._player_cache:
1383 func = self._extract_signature_function(
60064c53 1384 video_id, player_url, s
c8bf86d5
PH
1385 )
1386 self._player_cache[player_id] = func
1387 func = self._player_cache[player_id]
1388 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1389 self._print_sig_code(func, s)
c8bf86d5
PH
1390 return func(s)
1391 except Exception as e:
1392 tb = traceback.format_exc()
1393 raise ExtractorError(
78caa52a 1394 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1395
360e1ca5 1396 def _get_subtitles(self, video_id, webpage):
de7f3446 1397 try:
60e47a26 1398 subs_doc = self._download_xml(
38c2e5b8 1399 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1400 video_id, note=False)
1401 except ExtractorError as err:
9b9c5355 1402 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1403 return {}
de7f3446
JMF
1404
1405 sub_lang_list = {}
60e47a26
JMF
1406 for track in subs_doc.findall('track'):
1407 lang = track.attrib['lang_code']
7e660ac1
LD
1408 if lang in sub_lang_list:
1409 continue
360e1ca5 1410 sub_formats = []
23d17e4b 1411 for ext in self._SUBTITLE_FORMATS:
15707c7e 1412 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1413 'lang': lang,
1414 'v': video_id,
1415 'fmt': ext,
1416 'name': track.attrib['name'].encode('utf-8'),
1417 })
1418 sub_formats.append({
1419 'url': 'https://www.youtube.com/api/timedtext?' + params,
1420 'ext': ext,
1421 })
1422 sub_lang_list[lang] = sub_formats
de7f3446 1423 if not sub_lang_list:
69ea8ca4 1424 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1425 return {}
1426 return sub_lang_list
1427
a72778d3
S
1428 def _get_ytplayer_config(self, video_id, webpage):
1429 patterns = (
526b3b07
S
1430 # User data may contain arbitrary character sequences that may affect
1431 # JSON extraction with regex, e.g. when '};' is contained the second
1432 # regex won't capture the whole JSON. Yet working around by trying more
1433 # concrete regex first keeping in mind proper quoted string handling
1434 # to be implemented in future that will replace this workaround (see
067aa17e
S
1435 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1436 # https://github.com/ytdl-org/youtube-dl/pull/7599)
a72778d3
S
1437 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1438 r';ytplayer\.config\s*=\s*({.+?});',
1439 )
1440 config = self._search_regex(
1441 patterns, webpage, 'ytplayer.config', default=None)
1442 if config:
1443 return self._parse_json(
1444 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1445
360e1ca5 1446 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1447 """We need the webpage for getting the captions url, pass it as an
1448 argument to speed up the process."""
69ea8ca4 1449 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1450 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1451 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1452 if not player_config:
de7f3446
JMF
1453 self._downloader.report_warning(err_msg)
1454 return {}
de7f3446 1455 try:
0792d563 1456 args = player_config['args']
b78b292f
S
1457 caption_url = args.get('ttsurl')
1458 if caption_url:
1459 timestamp = args['timestamp']
1460 # We get the available subtitles
15707c7e 1461 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1462 'type': 'list',
1463 'tlangs': 1,
1464 'asrs': 1,
1465 })
1466 list_url = caption_url + '&' + list_params
1467 caption_list = self._download_xml(list_url, video_id)
1468 original_lang_node = caption_list.find('track')
1469 if original_lang_node is None:
1470 self._downloader.report_warning('Video doesn\'t have automatic captions')
1471 return {}
1472 original_lang = original_lang_node.attrib['lang_code']
1473 caption_kind = original_lang_node.attrib.get('kind', '')
1474
1475 sub_lang_list = {}
1476 for lang_node in caption_list.findall('target'):
1477 sub_lang = lang_node.attrib['lang_code']
1478 sub_formats = []
1479 for ext in self._SUBTITLE_FORMATS:
15707c7e 1480 params = compat_urllib_parse_urlencode({
b78b292f
S
1481 'lang': original_lang,
1482 'tlang': sub_lang,
1483 'fmt': ext,
1484 'ts': timestamp,
1485 'kind': caption_kind,
1486 })
1487 sub_formats.append({
1488 'url': caption_url + '&' + params,
1489 'ext': ext,
1490 })
1491 sub_lang_list[sub_lang] = sub_formats
1492 return sub_lang_list
1493
ddbb4c5c
S
1494 def make_captions(sub_url, sub_langs):
1495 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1496 caption_qs = compat_parse_qs(parsed_sub_url.query)
1497 captions = {}
1498 for sub_lang in sub_langs:
1499 sub_formats = []
1500 for ext in self._SUBTITLE_FORMATS:
1501 caption_qs.update({
1502 'tlang': [sub_lang],
1503 'fmt': [ext],
1504 })
1505 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1506 query=compat_urllib_parse_urlencode(caption_qs, True)))
1507 sub_formats.append({
1508 'url': sub_url,
1509 'ext': ext,
1510 })
1511 captions[sub_lang] = sub_formats
1512 return captions
1513
1514 # New captions format as of 22.06.2017
1515 player_response = args.get('player_response')
1516 if player_response and isinstance(player_response, compat_str):
1517 player_response = self._parse_json(
1518 player_response, video_id, fatal=False)
1519 if player_response:
1520 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1521 base_url = renderer['captionTracks'][0]['baseUrl']
1522 sub_lang_list = []
1523 for lang in renderer['translationLanguages']:
1524 lang_code = lang.get('languageCode')
1525 if lang_code:
1526 sub_lang_list.append(lang_code)
1527 return make_captions(base_url, sub_lang_list)
1528
b78b292f
S
1529 # Some videos don't provide ttsurl but rather caption_tracks and
1530 # caption_translation_languages (e.g. 20LmZk1hakA)
ddbb4c5c 1531 # Does not used anymore as of 22.06.2017
b78b292f
S
1532 caption_tracks = args['caption_tracks']
1533 caption_translation_languages = args['caption_translation_languages']
1534 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
ddbb4c5c 1535 sub_lang_list = []
b78b292f
S
1536 for lang in caption_translation_languages.split(','):
1537 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1538 sub_lang = lang_qs.get('lc', [None])[0]
ddbb4c5c
S
1539 if sub_lang:
1540 sub_lang_list.append(sub_lang)
1541 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1542 # An extractor error can be raise by the download process if there are
1543 # no automatic captions but there are subtitles
ddbb4c5c 1544 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1545 self._downloader.report_warning(err_msg)
1546 return {}
1547
21c340b8
S
1548 def _mark_watched(self, video_id, video_info, player_response):
1549 playback_url = url_or_none(try_get(
1550 player_response,
1551 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1552 video_info, lambda x: x['videostats_playback_base_url'][0]))
d77ab8e2
S
1553 if not playback_url:
1554 return
1555 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1556 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1557
1558 # cpn generation algorithm is reverse engineered from base.js.
1559 # In fact it works even with dummy cpn.
1560 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1561 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1562
1563 qs.update({
1564 'ver': ['2'],
1565 'cpn': [cpn],
1566 })
1567 playback_url = compat_urlparse.urlunparse(
15707c7e 1568 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1569
1570 self._download_webpage(
1571 playback_url, video_id, 'Marking watched',
1572 'Unable to mark watched', fatal=False)
1573
66c9fa36
S
1574 @staticmethod
1575 def _extract_urls(webpage):
1576 # Embedded YouTube player
1577 entries = [
1578 unescapeHTML(mobj.group('url'))
1579 for mobj in re.finditer(r'''(?x)
1580 (?:
1581 <iframe[^>]+?src=|
1582 data-video-url=|
1583 <embed[^>]+?src=|
1584 embedSWF\(?:\s*|
1585 <object[^>]+data=|
1586 new\s+SWFObject\(
1587 )
1588 (["\'])
1589 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1590 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1591 \1''', webpage)]
1592
1593 # lazyYT YouTube embed
1594 entries.extend(list(map(
1595 unescapeHTML,
1596 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1597
1598 # Wordpress "YouTube Video Importer" plugin
1599 matches = re.findall(r'''(?x)<div[^>]+
1600 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1601 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1602 entries.extend(m[-1] for m in matches)
1603
1604 return entries
1605
1606 @staticmethod
1607 def _extract_url(webpage):
1608 urls = YoutubeIE._extract_urls(webpage)
1609 return urls[0] if urls else None
1610
97665381
PH
1611 @classmethod
1612 def extract_id(cls, url):
1613 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1614 if mobj is None:
69ea8ca4 1615 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1616 video_id = mobj.group(2)
1617 return video_id
1618
9cafc3fd 1619 @staticmethod
19a107f2 1620 def _extract_chapters(description, duration):
9cafc3fd
S
1621 if not description:
1622 return None
1623 chapter_lines = re.findall(
1624 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1625 description)
1626 if not chapter_lines:
1627 return None
1628 chapters = []
1629 for next_num, (chapter_line, time_point) in enumerate(
1630 chapter_lines, start=1):
1631 start_time = parse_duration(time_point)
1632 if start_time is None:
1633 continue
39d4c1be
S
1634 if start_time > duration:
1635 break
9cafc3fd
S
1636 end_time = (duration if next_num == len(chapter_lines)
1637 else parse_duration(chapter_lines[next_num][1]))
1638 if end_time is None:
1639 continue
39d4c1be
S
1640 if end_time > duration:
1641 end_time = duration
1642 if start_time > end_time:
1643 break
9cafc3fd
S
1644 chapter_title = re.sub(
1645 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1646 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1647 chapters.append({
1648 'start_time': start_time,
1649 'end_time': end_time,
1650 'title': chapter_title,
1651 })
1652 return chapters
1653
c5e8d7af 1654 def _real_extract(self, url):
cf7e015f
S
1655 url, smuggled_data = unsmuggle_url(url, {})
1656
7e8c0af0 1657 proto = (
78caa52a
PH
1658 'http' if self._downloader.params.get('prefer_insecure', False)
1659 else 'https')
7e8c0af0 1660
7c80519c 1661 start_time = None
297a564b 1662 end_time = None
7c80519c
JMF
1663 parsed_url = compat_urllib_parse_urlparse(url)
1664 for component in [parsed_url.fragment, parsed_url.query]:
1665 query = compat_parse_qs(component)
297a564b 1666 if start_time is None and 't' in query:
7c80519c 1667 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1668 if start_time is None and 'start' in query:
1669 start_time = parse_duration(query['start'][0])
297a564b
JMF
1670 if end_time is None and 'end' in query:
1671 end_time = parse_duration(query['end'][0])
7c80519c 1672
c5e8d7af
PH
1673 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1674 mobj = re.search(self._NEXT_URL_RE, url)
1675 if mobj:
7fd002c0 1676 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1677 video_id = self.extract_id(url)
c5e8d7af
PH
1678
1679 # Get video webpage
aa79ac0c 1680 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
19a107f2 1681 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
1682
1683 # Attempt to extract SWF player URL
e0df6211 1684 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1685 if mobj is not None:
1686 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1687 else:
1688 player_url = None
1689
d8d24a92
S
1690 dash_mpds = []
1691
1692 def add_dash_mpd(video_info):
1693 dash_mpd = video_info.get('dashmpd')
1694 if dash_mpd and dash_mpd[0] not in dash_mpds:
1695 dash_mpds.append(dash_mpd[0])
1696
561b456e
S
1697 def add_dash_mpd_pr(pl_response):
1698 dash_mpd = url_or_none(try_get(
1699 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1700 compat_str))
1701 if dash_mpd and dash_mpd not in dash_mpds:
1702 dash_mpds.append(dash_mpd)
1703
c7121fa7
S
1704 is_live = None
1705 view_count = None
1706
1707 def extract_view_count(v_info):
1708 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1709
19a107f2
AG
1710 def extract_token(v_info):
1711 return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
1712
c2d125d9
S
1713 def extract_player_response(player_response, video_id):
1714 pl_response = str_or_none(player_response)
1715 if not pl_response:
1716 return
1717 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1718 if isinstance(pl_response, dict):
1719 add_dash_mpd_pr(pl_response)
1720 return pl_response
1721
dbdaaa23
S
1722 player_response = {}
1723
c5e8d7af 1724 # Get video info
6449cd80 1725 embed_webpage = None
c108eb73 1726 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
1727 age_gate = True
1728 # We simulate the access to the video from www.youtube.com/v/{video_id}
1729 # this can be viewed without login into Youtube
beb95e77
CL
1730 url = proto + '://www.youtube.com/embed/%s' % video_id
1731 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
15707c7e 1732 data = compat_urllib_parse_urlencode({
2c57c7fa
JMF
1733 'video_id': video_id,
1734 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 1735 'sts': self._search_regex(
beb95e77 1736 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 1737 })
7e8c0af0 1738 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
19a107f2
AG
1739 video_info_webpage = self._download_webpage(
1740 video_info_url, video_id,
1741 note='Refetching age-gated info webpage',
1742 errnote='unable to download video info webpage')
1743 video_info = compat_parse_qs(video_info_webpage)
1744 pl_response = video_info.get('player_response', [None])[0]
1745 player_response = extract_player_response(pl_response, video_id)
1746 add_dash_mpd(video_info)
1747 view_count = extract_view_count(video_info)
c108eb73
JMF
1748 else:
1749 age_gate = False
19a107f2
AG
1750 video_info = None
1751 sts = None
d8d24a92 1752 # Try looking directly into the video webpage
a72778d3
S
1753 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1754 if ytplayer_config:
4e62ebe2 1755 args = ytplayer_config['args']
4c76aa06 1756 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1757 # Convert to the same format returned by compat_parse_qs
1758 video_info = dict((k, [v]) for k, v in args.items())
1759 add_dash_mpd(video_info)
6496ccb4
S
1760 # Rental video is not rented but preview is available (e.g.
1761 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
067aa17e 1762 # https://github.com/ytdl-org/youtube-dl/issues/10532)
6496ccb4
S
1763 if not video_info and args.get('ypc_vid'):
1764 return self.url_result(
1765 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1766 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1767 is_live = True
19a107f2 1768 sts = ytplayer_config.get('sts')
dbdaaa23 1769 if not player_response:
c2d125d9 1770 player_response = extract_player_response(args.get('player_response'), video_id)
0a3cf9ad 1771 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
561b456e 1772 add_dash_mpd_pr(player_response)
19a107f2
AG
1773 # We also try looking in get_video_info since it may contain different dashmpd
1774 # URL that points to a DASH manifest with possibly different itag set (some itags
1775 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1776 # manifest pointed by get_video_info's dashmpd).
1777 # The general idea is to take a union of itags of both DASH manifests (for example
1778 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
1779 self.report_video_info_webpage_download(video_id)
1780 for el in ('embedded', 'detailpage', 'vevo', ''):
1781 query = {
1782 'video_id': video_id,
1783 'ps': 'default',
1784 'eurl': '',
1785 'gl': 'US',
1786 'hl': 'en',
1787 }
1788 if el:
1789 query['el'] = el
1790 if sts:
1791 query['sts'] = sts
1792 video_info_webpage = self._download_webpage(
1793 '%s://www.youtube.com/get_video_info' % proto,
1794 video_id, note=False,
1795 errnote='unable to download video info webpage',
1796 fatal=False, query=query)
1797 if not video_info_webpage:
1798 continue
1799 get_video_info = compat_parse_qs(video_info_webpage)
1800 if not player_response:
1801 pl_response = get_video_info.get('player_response', [None])[0]
1802 player_response = extract_player_response(pl_response, video_id)
1803 add_dash_mpd(get_video_info)
1804 if view_count is None:
1805 view_count = extract_view_count(get_video_info)
1806 if not video_info:
1807 video_info = get_video_info
1808 get_token = extract_token(get_video_info)
1809 if get_token:
1810 # Different get_video_info requests may report different results, e.g.
1811 # some may report video unavailability, but some may serve it without
1812 # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
1813 # the original webpage as well as el=info and el=embedded get_video_info
1814 # requests report video unavailability due to geo restriction while
1815 # el=detailpage succeeds and returns valid data). This is probably
1816 # due to YouTube measures against IP ranges of hosting providers.
1817 # Working around by preferring the first succeeded video_info containing
1818 # the token if no such video_info yet was found.
1819 token = extract_token(video_info)
1820 if not token:
1821 video_info = get_video_info
1822 break
bbb7c3f7
YCH
1823
1824 def extract_unavailable_message():
0add33ab
S
1825 messages = []
1826 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1827 msg = self._html_search_regex(
1828 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1829 video_webpage, 'unavailable %s' % kind, default=None)
1830 if msg:
1831 messages.append(msg)
1832 if messages:
1833 return '\n'.join(messages)
bbb7c3f7 1834
19a107f2 1835 if not video_info:
15be3eb5
RA
1836 unavailable_message = extract_unavailable_message()
1837 if not unavailable_message:
1838 unavailable_message = 'Unable to extract video data'
1839 raise ExtractorError(
1840 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1841
dbdaaa23
S
1842 video_details = try_get(
1843 player_response, lambda x: x['videoDetails'], dict) or {}
1844
8dbf751a
RA
1845 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1846 if not video_title:
cf7e015f
S
1847 self._downloader.report_warning('Unable to extract video title')
1848 video_title = '_'
1849
9cafc3fd 1850 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1851 if video_description:
fa4bc6e7
RA
1852
1853 def replace_url(m):
1854 redir_url = compat_urlparse.urljoin(url, m.group(1))
1855 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1856 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1857 qs = compat_parse_qs(parsed_redir_url.query)
1858 q = qs.get('q')
1859 if q and q[0]:
1860 return q[0]
1861 return redir_url
1862
9cafc3fd 1863 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1864 <a\s+
25cb7a0e 1865 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1866 (?:title|href)="([^"]+)"\s+
25cb7a0e 1867 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1868 class="[^"]*"[^>]*>
23f13e97 1869 [^<]+\.{3}\s*
cf7e015f 1870 </a>
fa4bc6e7 1871 ''', replace_url, video_description)
cf7e015f
S
1872 video_description = clean_html(video_description)
1873 else:
19a107f2 1874 video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
cf7e015f 1875
8fe10494 1876 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1877 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1878 multifeed_metadata_list = try_get(
1879 player_response,
1880 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1881 compat_str) or try_get(
1882 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1883 if multifeed_metadata_list:
1884 entries = []
1885 feed_ids = []
1886 for feed in multifeed_metadata_list.split(','):
1887 # Unquote should take place before split on comma (,) since textual
1888 # fields may contain comma as well (see
067aa17e 1889 # https://github.com/ytdl-org/youtube-dl/issues/8536)
8fe10494
S
1890 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1891 entries.append({
1892 '_type': 'url_transparent',
1893 'ie_key': 'Youtube',
1894 'url': smuggle_url(
1895 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1896 {'force_singlefeed': True}),
19a107f2 1897 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
8fe10494 1898 })
19a107f2 1899 feed_ids.append(feed_data['id'][0])
8fe10494
S
1900 self.to_screen(
1901 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1902 % (', '.join(feed_ids), video_id))
1903 return self.playlist_result(entries, video_id, video_title, video_description)
1904 else:
1905 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1906
c7121fa7 1907 if view_count is None:
1c9c8de2 1908 view_count = extract_view_count(video_info)
dbdaaa23
S
1909 if view_count is None and video_details:
1910 view_count = int_or_none(video_details.get('viewCount'))
1d699755 1911
27019dbb 1912 if is_live is None:
898238e9 1913 is_live = bool_or_none(video_details.get('isLive'))
27019dbb 1914
c5e8d7af
PH
1915 # Check for "rental" videos
1916 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
067aa17e 1917 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 1918
c63ca0ee
S
1919 def _extract_filesize(media_url):
1920 return int_or_none(self._search_regex(
1921 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1922
bf1317d2
S
1923 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1924 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1925
c5e8d7af
PH
1926 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1927 self.report_rtmp_download()
dd27fd17
PH
1928 formats = [{
1929 'format_id': '_rtmp',
1930 'protocol': 'rtmp',
1931 'url': video_info['conn'][0],
1932 'player_url': player_url,
1933 }]
bf1317d2 1934 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 1935 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1936 if 'rtmpe%3Dyes' in encoded_url_map:
067aa17e 1937 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
bf1317d2 1938 formats = []
3318832e 1939 formats_spec = {}
82156fdb 1940 fmt_list = video_info.get('fmt_list', [''])[0]
1941 if fmt_list:
1942 for fmt in fmt_list.split(','):
1943 spec = fmt.split('/')
3318832e 1944 if len(spec) > 1:
1945 width_height = spec[1].split('x')
1946 if len(width_height) == 2:
1947 formats_spec[spec[0]] = {
1948 'resolution': spec[1],
1949 'width': int_or_none(width_height[0]),
1950 'height': int_or_none(width_height[1]),
1951 }
bf1317d2
S
1952 for fmt in streaming_formats:
1953 itag = str_or_none(fmt.get('itag'))
1954 if not itag:
201e9eaa 1955 continue
bf1317d2
S
1956 quality = fmt.get('quality')
1957 quality_label = fmt.get('qualityLabel') or quality
1958 formats_spec[itag] = {
1959 'asr': int_or_none(fmt.get('audioSampleRate')),
1960 'filesize': int_or_none(fmt.get('contentLength')),
1961 'format_note': quality_label,
1962 'fps': int_or_none(fmt.get('fps')),
1963 'height': int_or_none(fmt.get('height')),
bf1317d2
S
1964 # bitrate for itag 43 is always 2147483647
1965 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1966 'width': int_or_none(fmt.get('width')),
1967 }
1968
1969 for fmt in streaming_formats:
19a107f2 1970 if fmt.get('drm_families'):
bf1317d2
S
1971 continue
1972 url = url_or_none(fmt.get('url'))
1973
1974 if not url:
19a107f2 1975 cipher = fmt.get('cipher')
bf1317d2
S
1976 if not cipher:
1977 continue
1978 url_data = compat_parse_qs(cipher)
1979 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1980 if not url:
1981 continue
1982 else:
1983 cipher = None
1984 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1985
2f483bc1
S
1986 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1987 # Unsupported FORMAT_STREAM_TYPE_OTF
1988 if stream_type == 3:
1989 continue
6449cd80 1990
bf1317d2
S
1991 format_id = fmt.get('itag') or url_data['itag'][0]
1992 if not format_id:
1993 continue
1994 format_id = compat_str(format_id)
a49eccdf 1995
bf1317d2
S
1996 if cipher:
1997 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1998 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1999 jsplayer_url_json = self._search_regex(
2000 ASSETS_RE,
2001 embed_webpage if age_gate else video_webpage,
2002 'JS player URL (1)', default=None)
2003 if not jsplayer_url_json and not age_gate:
2004 # We need the embed website after all
2005 if embed_webpage is None:
2006 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2007 embed_webpage = self._download_webpage(
2008 embed_url, video_id, 'Downloading embed webpage')
2009 jsplayer_url_json = self._search_regex(
2010 ASSETS_RE, embed_webpage, 'JS player URL')
2011
2012 player_url = json.loads(jsplayer_url_json)
cf010131 2013 if player_url is None:
bf1317d2
S
2014 player_url_json = self._search_regex(
2015 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2016 video_webpage, 'age gate player URL')
2017 player_url = json.loads(player_url_json)
2018
2019 if 'sig' in url_data:
2020 url += '&signature=' + url_data['sig'][0]
2021 elif 's' in url_data:
2022 encrypted_sig = url_data['s'][0]
2023
2024 if self._downloader.params.get('verbose'):
2025 if player_url is None:
19a107f2 2026 player_version = 'unknown'
bf1317d2 2027 player_desc = 'unknown'
cf010131 2028 else:
19a107f2
AG
2029 if player_url.endswith('swf'):
2030 player_version = self._search_regex(
2031 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
2032 'flash player', fatal=False)
2033 player_desc = 'flash player %s' % player_version
2034 else:
2035 player_version = self._search_regex(
2036 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
2037 r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
2038 player_url,
2039 'html5 player', fatal=False)
2040 player_desc = 'html5 player %s' % player_version
2041
bf1317d2
S
2042 parts_sizes = self._signature_cache_id(encrypted_sig)
2043 self.to_screen('{%s} signature length %s, %s' %
2044 (format_id, parts_sizes, player_desc))
2045
2046 signature = self._decrypt_signature(
2047 encrypted_sig, video_id, player_url, age_gate)
2048 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2049 url += '&%s=%s' % (sp, signature)
201e9eaa
PH
2050 if 'ratebypass' not in url:
2051 url += '&ratebypass=yes'
c9afb51c 2052
94278f72
YCH
2053 dct = {
2054 'format_id': format_id,
2055 'url': url,
2056 'player_url': player_url,
2057 }
2058 if format_id in self._formats:
2059 dct.update(self._formats[format_id])
3318832e 2060 if format_id in formats_spec:
2061 dct.update(formats_spec[format_id])
94278f72 2062
aabc2be6 2063 # Some itags are not included in DASH manifest thus corresponding formats will
067aa17e 2064 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
aabc2be6
S
2065 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2066 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2067 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 2068
bf1317d2
S
2069 if width is None:
2070 width = int_or_none(fmt.get('width'))
2071 if height is None:
2072 height = int_or_none(fmt.get('height'))
2073
c63ca0ee
S
2074 filesize = int_or_none(url_data.get(
2075 'clen', [None])[0]) or _extract_filesize(url)
2076
bf1317d2
S
2077 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2078 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2079
4878759f
S
2080 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2081 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
bf1317d2 2082 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
54fc90aa 2083
94278f72 2084 more_fields = {
c63ca0ee 2085 'filesize': filesize,
bf1317d2 2086 'tbr': tbr,
c9afb51c
AH
2087 'width': width,
2088 'height': height,
bf1317d2
S
2089 'fps': fps,
2090 'format_note': quality_label or quality,
c9afb51c 2091 }
94278f72
YCH
2092 for key, value in more_fields.items():
2093 if value:
2094 dct[key] = value
bf1317d2 2095 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
aabc2be6
S
2096 if type_:
2097 type_split = type_.split(';')
2098 kind_ext = type_split[0].split('/')
2099 if len(kind_ext) == 2:
94278f72
YCH
2100 kind, _ = kind_ext
2101 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
2102 if kind in ('audio', 'video'):
2103 codecs = None
2104 for mobj in re.finditer(
2105 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2106 if mobj.group('key') == 'codecs':
2107 codecs = mobj.group('val')
2108 break
2109 if codecs:
6310acf5 2110 dct.update(parse_codecs(codecs))
e4a60912
S
2111 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2112 dct['downloader_options'] = {
2113 # Youtube throttles chunks >~10M
2114 'http_chunk_size': 10485760,
2115 }
aabc2be6 2116 formats.append(dct)
c5e8d7af 2117 else:
c3e54389
S
2118 manifest_url = (
2119 url_or_none(try_get(
2120 player_response,
2121 lambda x: x['streamingData']['hlsManifestUrl'],
3089bc74
S
2122 compat_str))
2123 or url_or_none(try_get(
c3e54389
S
2124 video_info, lambda x: x['hlsvp'][0], compat_str)))
2125 if manifest_url:
2126 formats = []
2127 m3u8_formats = self._extract_m3u8_formats(
2128 manifest_url, video_id, 'mp4', fatal=False)
2129 for a_format in m3u8_formats:
2130 itag = self._search_regex(
2131 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2132 if itag:
2133 a_format['format_id'] = itag
2134 if itag in self._formats:
2135 dct = self._formats[itag].copy()
2136 dct.update(a_format)
2137 a_format = dct
2138 a_format['player_url'] = player_url
2139 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2140 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2141 formats.append(a_format)
2142 else:
13577349 2143 error_message = extract_unavailable_message()
c3e54389 2144 if not error_message:
13577349
S
2145 error_message = clean_html(try_get(
2146 player_response, lambda x: x['playabilityStatus']['reason'],
2147 compat_str))
2148 if not error_message:
2149 error_message = clean_html(
2150 try_get(video_info, lambda x: x['reason'][0], compat_str))
c3e54389
S
2151 if error_message:
2152 raise ExtractorError(error_message, expected=True)
2153 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 2154
7e72694b 2155 # uploader
dbdaaa23
S
2156 video_uploader = try_get(
2157 video_info, lambda x: x['author'][0],
2158 compat_str) or str_or_none(video_details.get('author'))
7e72694b
S
2159 if video_uploader:
2160 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2161 else:
2162 self._downloader.report_warning('unable to extract uploader name')
2163
2164 # uploader_id
2165 video_uploader_id = None
2166 video_uploader_url = None
2167 mobj = re.search(
2168 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2169 video_webpage)
2170 if mobj is not None:
2171 video_uploader_id = mobj.group('uploader_id')
2172 video_uploader_url = mobj.group('uploader_url')
2173 else:
19a107f2 2174 self._downloader.report_warning('unable to extract uploader nickname')
7e72694b 2175
b45a9e69 2176 channel_id = (
3089bc74
S
2177 str_or_none(video_details.get('channelId'))
2178 or self._html_search_meta(
2179 'channelId', video_webpage, 'channel id', default=None)
2180 or self._search_regex(
b45a9e69 2181 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2182 video_webpage, 'channel id', default=None, group='id'))
dd4c4492
S
2183 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2184
19a107f2
AG
2185 # thumbnail image
2186 # We try first to get a high quality image:
2187 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2188 video_webpage, re.DOTALL)
2189 if m_thumb is not None:
2190 video_thumbnail = m_thumb.group(1)
2191 elif 'thumbnail_url' not in video_info:
2192 self._downloader.report_warning('unable to extract video thumbnail')
7e72694b 2193 video_thumbnail = None
19a107f2
AG
2194 else: # don't panic if we can't find it
2195 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
7e72694b
S
2196
2197 # upload date
2198 upload_date = self._html_search_meta(
2199 'datePublished', video_webpage, 'upload date', default=None)
2200 if not upload_date:
2201 upload_date = self._search_regex(
2202 [r'(?s)id="eow-date.*?>(.*?)</span>',
2203 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2204 video_webpage, 'upload date', default=None)
2205 upload_date = unified_strdate(upload_date)
2206
2207 video_license = self._html_search_regex(
2208 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2209 video_webpage, 'license', default=None)
2210
2211 m_music = re.search(
2212 r'''(?x)
2213 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2214 <ul[^>]*>\s*
2215 <li>(?P<title>.+?)
2216 by (?P<creator>.+?)
2217 (?:
2218 \(.+?\)|
2219 <a[^>]*
2220 (?:
2221 \bhref=["\']/red[^>]*>| # drop possible
2222 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2223 )
2224 .*?
2225 )?</li
2226 ''',
2227 video_webpage)
2228 if m_music:
2229 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2230 video_creator = clean_html(m_music.group('creator'))
2231 else:
2232 video_alt_title = video_creator = None
2233
2234 def extract_meta(field):
2235 return self._html_search_regex(
2236 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2237 video_webpage, field, default=None)
2238
2239 track = extract_meta('Song')
2240 artist = extract_meta('Artist')
92bc97d3 2241 album = extract_meta('Album')
822b9d9c
RA
2242
2243 # Youtube Music Auto-generated description
92bc97d3 2244 release_date = release_year = None
822b9d9c
RA
2245 if video_description:
2246 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2247 if mobj:
2248 if not track:
2249 track = mobj.group('track').strip()
2250 if not artist:
2251 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
92bc97d3
RA
2252 if not album:
2253 album = mobj.group('album'.strip())
822b9d9c
RA
2254 release_year = mobj.group('release_year')
2255 release_date = mobj.group('release_date')
2256 if release_date:
2257 release_date = release_date.replace('-', '')
2258 if not release_year:
2259 release_year = int(release_date[:4])
2260 if release_year:
2261 release_year = int(release_year)
7e72694b
S
2262
2263 m_episode = re.search(
2264 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2265 video_webpage)
2266 if m_episode:
c2dd2dc0 2267 series = unescapeHTML(m_episode.group('series'))
7e72694b
S
2268 season_number = int(m_episode.group('season'))
2269 episode_number = int(m_episode.group('episode'))
2270 else:
2271 series = season_number = episode_number = None
2272
2273 m_cat_container = self._search_regex(
2274 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2275 video_webpage, 'categories', default=None)
2276 if m_cat_container:
2277 category = self._html_search_regex(
2278 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2279 default=None)
19a107f2
AG
2280 video_categories = None if category is None else [category]
2281 else:
2282 video_categories = None
7e72694b
S
2283
2284 video_tags = [
2285 unescapeHTML(m.group('content'))
2286 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2287
2288 def _extract_count(count_name):
2289 return str_to_int(self._search_regex(
2290 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2291 % re.escape(count_name),
2292 video_webpage, count_name, default=None))
2293
2294 like_count = _extract_count('like')
2295 dislike_count = _extract_count('dislike')
2296
dbdaaa23
S
2297 if view_count is None:
2298 view_count = str_to_int(self._search_regex(
2299 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2300 'view count', default=None))
2301
bf3c9326
S
2302 average_rating = (
2303 float_or_none(video_details.get('averageRating'))
2304 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2305
7e72694b
S
2306 # subtitles
2307 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2308 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2309
2310 video_duration = try_get(
2311 video_info, lambda x: int_or_none(x['length_seconds'][0]))
dbdaaa23
S
2312 if not video_duration:
2313 video_duration = int_or_none(video_details.get('lengthSeconds'))
7e72694b
S
2314 if not video_duration:
2315 video_duration = parse_duration(self._html_search_meta(
2316 'duration', video_webpage, 'video duration'))
2317
2318 # annotations
2319 video_annotations = None
2320 if self._downloader.params.get('writeannotations', False):
64b6a4e9
RA
2321 xsrf_token = self._search_regex(
2322 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2323 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2324 invideo_url = try_get(
2325 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2326 if xsrf_token and invideo_url:
2327 xsrf_field_name = self._search_regex(
2328 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2329 video_webpage, 'xsrf field name',
2330 group='xsrf_field_name', default='session_token')
2331 video_annotations = self._download_webpage(
2332 self._proto_relative_url(invideo_url),
2333 video_id, note='Downloading annotations',
2334 errnote='Unable to download video annotations', fatal=False,
2335 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2336
19a107f2 2337 chapters = self._extract_chapters(description_original, video_duration)
7e72694b 2338
dd27fd17 2339 # Look for the DASH manifest
203fb43f 2340 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2341 dash_mpd_fatal = True
8ff648e4 2342 for mpd_url in dash_mpds:
d8d24a92 2343 dash_formats = {}
774e208f 2344 try:
05d0d131
YCH
2345 def decrypt_sig(mobj):
2346 s = mobj.group(1)
2347 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2348 return '/signature/%s' % dec_s
2349
8ff648e4 2350 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2351
8ff648e4 2352 for df in self._extract_mpd_formats(
2353 mpd_url, video_id, fatal=dash_mpd_fatal,
2354 formats_dict=self._formats):
c63ca0ee
S
2355 if not df.get('filesize'):
2356 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2357 # Do not overwrite DASH format found in some previous DASH manifest
2358 if df['format_id'] not in dash_formats:
2359 dash_formats[df['format_id']] = df
77c6fb5b
S
2360 # Additional DASH manifests may end up in HTTP Error 403 therefore
2361 # allow them to fail without bug report message if we already have
2362 # some DASH manifest succeeded. This is temporary workaround to reduce
2363 # burst of bug reports until we figure out the reason and whether it
2364 # can be fixed at all.
2365 dash_mpd_fatal = False
774e208f
PH
2366 except (ExtractorError, KeyError) as e:
2367 self.report_warning(
2368 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2369 if dash_formats:
04b3b3df
JMF
2370 # Remove the formats we found through non-DASH, they
2371 # contain less info and it can be wrong, because we use
2372 # fixed values (for example the resolution). See
067aa17e 2373 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
04b3b3df 2374 # example.
d80265cc 2375 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2376 formats.extend(dash_formats.values())
d80044c2 2377
6271f1ca
PH
2378 # Check for malformed aspect ratio
2379 stretched_m = re.search(
2380 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2381 video_webpage)
2382 if stretched_m:
313dfc45
LL
2383 w = float(stretched_m.group('w'))
2384 h = float(stretched_m.group('h'))
5faf9fed
S
2385 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2386 # We will only process correct ratios.
313dfc45 2387 if w > 0 and h > 0:
41f24c32 2388 ratio = w / h
313dfc45
LL
2389 for f in formats:
2390 if f.get('vcodec') != 'none':
2391 f['stretched_ratio'] = ratio
6271f1ca 2392
026fbedc 2393 if not formats:
19a107f2
AG
2394 token = extract_token(video_info)
2395 if not token:
2396 if 'reason' in video_info:
2397 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2398 regions_allowed = self._html_search_meta(
2399 'regionsAllowed', video_webpage, default=None)
2400 countries = regions_allowed.split(',') if regions_allowed else None
2401 self.raise_geo_restricted(
2402 msg=video_info['reason'][0], countries=countries)
2403 reason = video_info['reason'][0]
2404 if 'Invalid parameters' in reason:
2405 unavailable_message = extract_unavailable_message()
2406 if unavailable_message:
2407 reason = unavailable_message
2408 raise ExtractorError(
2409 'YouTube said: %s' % reason,
2410 expected=True, video_id=video_id)
2411 else:
2412 raise ExtractorError(
2413 '"token" parameter not in video info for unknown reason',
2414 video_id=video_id)
2415
2416 if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
2417 raise ExtractorError('This video is DRM protected.', expected=True)
0d297518 2418
4bcc7bd1 2419 self._sort_formats(formats)
4ea3be0a 2420
21c340b8 2421 self.mark_watched(video_id, video_info, player_response)
d77ab8e2 2422
4ea3be0a 2423 return {
8bcc8756
JW
2424 'id': video_id,
2425 'uploader': video_uploader,
2426 'uploader_id': video_uploader_id,
fd050249 2427 'uploader_url': video_uploader_url,
dd4c4492
S
2428 'channel_id': channel_id,
2429 'channel_url': channel_url,
8bcc8756 2430 'upload_date': upload_date,
7caf9830 2431 'license': video_license,
936784b2 2432 'creator': video_creator or artist,
8bcc8756 2433 'title': video_title,
936784b2 2434 'alt_title': video_alt_title or track,
19a107f2 2435 'thumbnail': video_thumbnail,
8bcc8756
JW
2436 'description': video_description,
2437 'categories': video_categories,
000b6b5a 2438 'tags': video_tags,
8bcc8756 2439 'subtitles': video_subtitles,
360e1ca5 2440 'automatic_captions': automatic_captions,
8bcc8756
JW
2441 'duration': video_duration,
2442 'age_limit': 18 if age_gate else 0,
2443 'annotations': video_annotations,
9cafc3fd 2444 'chapters': chapters,
7e8c0af0 2445 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2446 'view_count': view_count,
4ea3be0a 2447 'like_count': like_count,
2448 'dislike_count': dislike_count,
bf3c9326 2449 'average_rating': average_rating,
8bcc8756 2450 'formats': formats,
2fe1ff85 2451 'is_live': is_live,
7c80519c 2452 'start_time': start_time,
297a564b 2453 'end_time': end_time,
12afdc2a
S
2454 'series': series,
2455 'season_number': season_number,
2456 'episode_number': episode_number,
936784b2
S
2457 'track': track,
2458 'artist': artist,
5caabd3c 2459 'album': album,
2460 'release_date': release_date,
2461 'release_year': release_year,
4ea3be0a 2462 }
c5e8d7af 2463
5f6a1245 2464
8e7aad20 2465class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2466 IE_DESC = 'YouTube.com playlists'
d67cc9fa 2467 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
2468 (?:https?://)?
2469 (?:\w+\.)?
c5e8d7af 2470 (?:
c0345b82 2471 (?:
66b48727 2472 youtube(?:kids)?\.com|
c0345b82
S
2473 invidio\.us
2474 )
2475 /
feaa5ad7 2476 (?:
87dadd45 2477 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
feaa5ad7
S
2478 \? (?:.*?[&;])*? (?:p|a|list)=
2479 | p/
2480 )|
2481 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
c5e8d7af 2482 )
d67cc9fa 2483 (
66b48727 2484 (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
5f6a1245 2485 # Top tracks, they can also include dots
d67cc9fa
JMF
2486 |(?:MC)[\w\.]*
2487 )
c5e8d7af
PH
2488 .*
2489 |
d0ba5587
S
2490 (%(playlist_id)s)
2491 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
8d81f3e3 2492 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
351f37c0
S
2493 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2494 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
78caa52a 2495 IE_NAME = 'youtube:playlist'
81127aa5 2496 _TESTS = [{
19a107f2 2497 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
81127aa5 2498 'info_dict': {
19a107f2
AG
2499 'title': 'ytdl test PL',
2500 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
81127aa5 2501 },
19a107f2 2502 'playlist_count': 3,
9291475f 2503 }, {
19a107f2 2504 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
9291475f 2505 'info_dict': {
19a107f2
AG
2506 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2507 'title': 'YDL_Empty_List',
9291475f
PH
2508 },
2509 'playlist_count': 0,
19a107f2 2510 'skip': 'This playlist is private',
9291475f
PH
2511 }, {
2512 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2513 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2514 'info_dict': {
2515 'title': '29C3: Not my department',
acf757f4 2516 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
13a75688
S
2517 'uploader': 'Christiaan008',
2518 'uploader_id': 'ChRiStIaAn008',
9291475f 2519 },
19a107f2 2520 'playlist_count': 95,
9291475f
PH
2521 }, {
2522 'note': 'issue #673',
2523 'url': 'PLBB231211A4F62143',
2524 'info_dict': {
f46a8702 2525 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 2526 'id': 'PLBB231211A4F62143',
13a75688
S
2527 'uploader': 'Wickydoo',
2528 'uploader_id': 'Wickydoo',
9291475f
PH
2529 },
2530 'playlist_mincount': 26,
2531 }, {
2532 'note': 'Large playlist',
2533 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2534 'info_dict': {
2535 'title': 'Uploads from Cauchemar',
acf757f4 2536 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
13a75688
S
2537 'uploader': 'Cauchemar',
2538 'uploader_id': 'Cauchemar89',
9291475f
PH
2539 },
2540 'playlist_mincount': 799,
2541 }, {
2542 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2543 'info_dict': {
2544 'title': 'YDL_safe_search',
acf757f4 2545 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
2546 },
2547 'playlist_count': 2,
4201ba13 2548 'skip': 'This playlist is private',
ac7553d0
PH
2549 }, {
2550 'note': 'embedded',
2d3d2997 2551 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0
PH
2552 'playlist_count': 4,
2553 'info_dict': {
2554 'title': 'JODA15',
acf757f4 2555 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
13a75688
S
2556 'uploader': 'milan',
2557 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
ac7553d0 2558 }
87dadd45
S
2559 }, {
2560 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2561 'playlist_mincount': 485,
2562 'info_dict': {
13a75688 2563 'title': '2018 Chinese New Singles (11/6 updated)',
87dadd45 2564 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
13a75688
S
2565 'uploader': 'LBK',
2566 'uploader_id': 'sdragonfang',
87dadd45 2567 }
6b08cdf6
PH
2568 }, {
2569 'note': 'Embedded SWF player',
2d3d2997 2570 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
6b08cdf6
PH
2571 'playlist_count': 4,
2572 'info_dict': {
2573 'title': 'JODA7',
acf757f4 2574 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
13a75688
S
2575 },
2576 'skip': 'This playlist does not exist',
4b7df0d3
JMF
2577 }, {
2578 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2579 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2580 'info_dict': {
acf757f4
PH
2581 'title': 'Uploads from Interstellar Movie',
2582 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688
S
2583 'uploader': 'Interstellar Movie',
2584 'uploader_id': 'InterstellarMovie1',
4b7df0d3 2585 },
481cc733 2586 'playlist_mincount': 21,
dacb3a86
S
2587 }, {
2588 # Playlist URL that does not actually serve a playlist
2589 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2590 'info_dict': {
2591 'id': 'FqZTN594JQw',
2592 'ext': 'webm',
2593 'title': "Smiley's People 01 detective, Adventure Series, Action",
2594 'uploader': 'STREEM',
2595 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2596 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2597 'upload_date': '20150526',
2598 'license': 'Standard YouTube License',
2599 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2600 'categories': ['People & Blogs'],
2601 'tags': list,
dbdaaa23 2602 'view_count': int,
dacb3a86
S
2603 'like_count': int,
2604 'dislike_count': int,
2605 },
2606 'params': {
2607 'skip_download': True,
2608 },
13a75688 2609 'skip': 'This video is not available.',
dacb3a86 2610 'add_ie': [YoutubeIE.ie_key()],
481cc733
S
2611 }, {
2612 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2613 'info_dict': {
2614 'id': 'yeWKywCrFtk',
2615 'ext': 'mp4',
2616 'title': 'Small Scale Baler and Braiding Rugs',
2617 'uploader': 'Backus-Page House Museum',
2618 'uploader_id': 'backuspagemuseum',
ec85ded8 2619 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
481cc733 2620 'upload_date': '20161008',
481cc733
S
2621 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2622 'categories': ['Nonprofits & Activism'],
2623 'tags': list,
2624 'like_count': int,
2625 'dislike_count': int,
2626 },
2627 'params': {
2628 'noplaylist': True,
2629 'skip_download': True,
2630 },
2e18adec
S
2631 }, {
2632 # https://github.com/ytdl-org/youtube-dl/issues/21844
2633 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2634 'info_dict': {
2635 'title': 'Data Analysis with Dr Mike Pound',
2636 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2637 'uploader_id': 'Computerphile',
2638 'uploader': 'Computerphile',
2639 },
2640 'playlist_mincount': 11,
feaa5ad7
S
2641 }, {
2642 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2643 'only_matching': True,
a6857510
S
2644 }, {
2645 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2646 'only_matching': True,
409b9324
S
2647 }, {
2648 # music album playlist
2649 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2650 'only_matching': True,
c0345b82
S
2651 }, {
2652 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2653 'only_matching': True,
66b48727
RA
2654 }, {
2655 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2656 'only_matching': True,
81127aa5 2657 }]
c5e8d7af 2658
880e1c52
JMF
2659 def _real_initialize(self):
2660 self._login()
2661
351f37c0
S
2662 def extract_videos_from_page(self, page):
2663 ids_in_page = []
2664 titles_in_page = []
2665
2666 for item in re.findall(
2667 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2668 attrs = extract_attributes(item)
2669 video_id = attrs['data-video-id']
2670 video_title = unescapeHTML(attrs.get('data-title'))
2671 if video_title:
2672 video_title = video_title.strip()
2673 ids_in_page.append(video_id)
2674 titles_in_page.append(video_title)
2675
2676 # Fallback with old _VIDEO_RE
2677 self.extract_videos_from_page_impl(
2678 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2679
2680 # Relaxed fallbacks
2681 self.extract_videos_from_page_impl(
2682 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2683 ids_in_page, titles_in_page)
2684 self.extract_videos_from_page_impl(
2685 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2686 ids_in_page, titles_in_page)
2687
2688 return zip(ids_in_page, titles_in_page)
2689
652cdaa2 2690 def _extract_mix(self, playlist_id):
99209c29 2691 # The mixes are generated from a single video
652cdaa2 2692 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
2693 ids = []
2694 last_id = playlist_id[-11:]
2695 for n in itertools.count(1):
19a107f2 2696 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
1b6182d8
JMF
2697 webpage = self._download_webpage(
2698 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2699 new_ids = orderedSet(re.findall(
2700 r'''(?xs)data-video-username=".*?".*?
2701 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2702 webpage))
2703 # Fetch new pages until all the videos are repeated, it seems that
2704 # there are always 51 unique videos.
2705 new_ids = [_id for _id in new_ids if _id not in ids]
2706 if not new_ids:
2707 break
2708 ids.extend(new_ids)
2709 last_id = ids[-1]
2710
2711 url_results = self._ids_to_results(ids)
2712
bc2f773b 2713 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5 2714 title_span = (
3089bc74
S
2715 search_title('playlist-title')
2716 or search_title('title long-title')
2717 or search_title('title'))
76d1700b 2718 title = clean_html(title_span)
652cdaa2
JMF
2719
2720 return self.playlist_result(url_results, playlist_id, title)
2721
448830ce 2722 def _extract_playlist(self, playlist_id):
dbb94fb0
S
2723 url = self._TEMPLATE_URL % playlist_id
2724 page = self._download_webpage(url, playlist_id)
dbb94fb0 2725
067aa17e 2726 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
8bc0800d 2727 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
39b62db1
YCH
2728 match = match.strip()
2729 # Check if the playlist exists or is private
4201ba13
S
2730 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2731 if mobj:
2732 reason = mobj.group('reason')
2733 message = 'This playlist %s' % reason
2734 if 'private' in reason:
2735 message += ', use --username or --netrc to access it'
2736 message += '.'
2737 raise ExtractorError(message, expected=True)
39b62db1
YCH
2738 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2739 raise ExtractorError(
2740 'Invalid parameters. Maybe URL is incorrect.',
2741 expected=True)
2742 elif re.match(r'[^<]*Choose your language[^<]*', match):
2743 continue
2744 else:
2745 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 2746
dbb94fb0 2747 playlist_title = self._html_search_regex(
63b4295d 2748 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
dacb3a86 2749 page, 'title', default=None)
c5e8d7af 2750
07aeced6 2751 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
4e3f1f04 2752 uploader = self._html_search_regex(
07aeced6
S
2753 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2754 page, 'uploader', default=None)
2755 mobj = re.search(
2756 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2757 page)
2758 if mobj:
2759 uploader_id = mobj.group('uploader_id')
2760 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2761 else:
2762 uploader_id = uploader_url = None
2763
dacb3a86
S
2764 has_videos = True
2765
2766 if not playlist_title:
2767 try:
2768 # Some playlist URLs don't actually serve a playlist (e.g.
2769 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2770 next(self._entries(page, playlist_id))
2771 except StopIteration:
2772 has_videos = False
2773
07aeced6 2774 playlist = self.playlist_result(
dacb3a86 2775 self._entries(page, playlist_id), playlist_id, playlist_title)
07aeced6
S
2776 playlist.update({
2777 'uploader': uploader,
2778 'uploader_id': uploader_id,
2779 'uploader_url': uploader_url,
2780 })
2781
2782 return has_videos, playlist
c5e8d7af 2783
ebf1b291 2784 def _check_download_just_video(self, url, playlist_id):
448830ce
S
2785 # Check if it's a video-specific URL
2786 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
481cc733 2787 video_id = query_dict.get('v', [None])[0] or self._search_regex(
87dadd45 2788 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
481cc733
S
2789 'video id', default=None)
2790 if video_id:
448830ce
S
2791 if self._downloader.params.get('noplaylist'):
2792 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
dacb3a86 2793 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce
S
2794 else:
2795 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
dacb3a86
S
2796 return video_id, None
2797 return None, None
448830ce 2798
ebf1b291
S
2799 def _real_extract(self, url):
2800 # Extract playlist id
2801 mobj = re.match(self._VALID_URL, url)
2802 if mobj is None:
2803 raise ExtractorError('Invalid URL: %s' % url)
2804 playlist_id = mobj.group(1) or mobj.group(2)
2805
dacb3a86 2806 video_id, video = self._check_download_just_video(url, playlist_id)
ebf1b291
S
2807 if video:
2808 return video
2809
466a6145 2810 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
2811 # Mixes require a custom extraction process
2812 return self._extract_mix(playlist_id)
2813
dacb3a86
S
2814 has_videos, playlist = self._extract_playlist(playlist_id)
2815 if has_videos or not video_id:
2816 return playlist
2817
2818 # Some playlist URLs don't actually serve a playlist (see
067aa17e 2819 # https://github.com/ytdl-org/youtube-dl/issues/10537).
dacb3a86
S
2820 # Fallback to plain video extraction if there is a video id
2821 # along with playlist id.
2822 return self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce 2823
c5e8d7af 2824
648e6a1f 2825class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2826 IE_DESC = 'YouTube.com channels'
66b48727 2827 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 2828 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 2829 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 2830 IE_NAME = 'youtube:channel'
cdc628a4
PH
2831 _TESTS = [{
2832 'note': 'paginated channel',
2833 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2834 'playlist_mincount': 91,
acf757f4 2835 'info_dict': {
9170ca5b
JMF
2836 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2837 'title': 'Uploads from lex will',
13a75688
S
2838 'uploader': 'lex will',
2839 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
acf757f4 2840 }
5c43afd4
JMF
2841 }, {
2842 'note': 'Age restricted channel',
2843 # from https://www.youtube.com/user/DeusExOfficial
2844 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2845 'playlist_mincount': 64,
2846 'info_dict': {
2847 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2848 'title': 'Uploads from Deus Ex',
13a75688
S
2849 'uploader': 'Deus Ex',
2850 'uploader_id': 'DeusExOfficial',
5c43afd4 2851 },
cd5a74a2
S
2852 }, {
2853 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2854 'only_matching': True,
66b48727
RA
2855 }, {
2856 'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2857 'only_matching': True,
cdc628a4 2858 }]
c5e8d7af 2859
e462474e
S
2860 @classmethod
2861 def suitable(cls, url):
f07e276a
S
2862 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2863 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 2864
9558dcec
S
2865 def _build_template_url(self, url, channel_id):
2866 return self._TEMPLATE_URL % channel_id
2867
c5e8d7af 2868 def _real_extract(self, url):
9ff67727 2869 channel_id = self._match_id(url)
c5e8d7af 2870
9558dcec 2871 url = self._build_template_url(url, channel_id)
386bdfa6
S
2872
2873 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2874 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2875 # otherwise fallback on channel by page extraction
2876 channel_page = self._download_webpage(
2877 url + '?view=57', channel_id,
2878 'Downloading channel page', fatal=False)
2b3c2546
PH
2879 if channel_page is False:
2880 channel_playlist_id = False
2881 else:
2882 channel_playlist_id = self._html_search_meta(
2883 'channelId', channel_page, 'channel id', default=None)
2884 if not channel_playlist_id:
73c4ac2c
S
2885 channel_url = self._html_search_meta(
2886 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2887 channel_page, 'channel url', default=None)
2888 if channel_url:
2889 channel_playlist_id = self._search_regex(
2890 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2891 channel_url, 'channel id', default=None)
386bdfa6
S
2892 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2893 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
2894 return self.url_result(
2895 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 2896
60bf45c8 2897 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
2898 autogenerated = re.search(r'''(?x)
2899 class="[^"]*?(?:
2900 channel-header-autogenerated-label|
2901 yt-channel-title-autogenerated
2902 )[^"]*"''', channel_page) is not None
c5e8d7af 2903
b9643eed
JMF
2904 if autogenerated:
2905 # The videos are contained in a single page
2906 # the ajax pages can't be used, they are empty
b82f815f 2907 entries = [
fb69240c
S
2908 self.url_result(
2909 video_id, 'Youtube', video_id=video_id,
2910 video_title=video_title)
8f02ad4f 2911 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
2912 return self.playlist_result(entries, channel_id)
2913
73c4ac2c
S
2914 try:
2915 next(self._entries(channel_page, channel_id))
2916 except StopIteration:
2917 alert_message = self._html_search_regex(
2918 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2919 channel_page, 'alert', default=None, group='alert')
2920 if alert_message:
2921 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2922
648e6a1f 2923 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
2924
2925
eb0f3e7e 2926class YoutubeUserIE(YoutubeChannelIE):
78caa52a 2927 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
ea696249 2928 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
9558dcec 2929 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 2930 IE_NAME = 'youtube:user'
c5e8d7af 2931
cdc628a4
PH
2932 _TESTS = [{
2933 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2934 'playlist_mincount': 320,
2935 'info_dict': {
73c4ac2c
S
2936 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2937 'title': 'Uploads from The Linux Foundation',
13a75688
S
2938 'uploader': 'The Linux Foundation',
2939 'uploader_id': 'TheLinuxFoundation',
cdc628a4 2940 }
9558dcec
S
2941 }, {
2942 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2943 # but not https://www.youtube.com/user/12minuteathlete/videos
2944 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2945 'playlist_mincount': 249,
2946 'info_dict': {
2947 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2948 'title': 'Uploads from 12 Minute Athlete',
13a75688
S
2949 'uploader': '12 Minute Athlete',
2950 'uploader_id': 'the12minuteathlete',
9558dcec 2951 }
cdc628a4
PH
2952 }, {
2953 'url': 'ytuser:phihag',
2954 'only_matching': True,
daa0df9e
YCH
2955 }, {
2956 'url': 'https://www.youtube.com/c/gametrailers',
2957 'only_matching': True,
9558dcec
S
2958 }, {
2959 'url': 'https://www.youtube.com/gametrailers',
2960 'only_matching': True,
73c4ac2c 2961 }, {
0e879f43 2962 # This channel is not available, geo restricted to JP
73c4ac2c
S
2963 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2964 'only_matching': True,
cdc628a4
PH
2965 }]
2966
e3ea4790 2967 @classmethod
f4b05232 2968 def suitable(cls, url):
e3ea4790
JMF
2969 # Don't return True if the url can be extracted with other youtube
2970 # extractor, the regex would is too permissive and it would match.
f3a58d46 2971 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2972 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
2973 return False
2974 else:
2975 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 2976
9558dcec
S
2977 def _build_template_url(self, url, channel_id):
2978 mobj = re.match(self._VALID_URL, url)
2979 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2980
b05654f0 2981
f07e276a
S
2982class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2983 IE_DESC = 'YouTube.com live streams'
073d5bf5 2984 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
f07e276a
S
2985 IE_NAME = 'youtube:live'
2986
2987 _TESTS = [{
2d3d2997 2988 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
f07e276a
S
2989 'info_dict': {
2990 'id': 'a48o2S1cPoo',
2991 'ext': 'mp4',
2992 'title': 'The Young Turks - Live Main Show',
2993 'uploader': 'The Young Turks',
2994 'uploader_id': 'TheYoungTurks',
ec85ded8 2995 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
f07e276a
S
2996 'upload_date': '20150715',
2997 'license': 'Standard YouTube License',
2998 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2999 'categories': ['News & Politics'],
3000 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3001 'like_count': int,
3002 'dislike_count': int,
3003 },
3004 'params': {
3005 'skip_download': True,
3006 },
3007 }, {
2d3d2997 3008 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
f07e276a 3009 'only_matching': True,
c1b2a085
S
3010 }, {
3011 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3012 'only_matching': True,
073d5bf5
S
3013 }, {
3014 'url': 'https://www.youtube.com/TheYoungTurks/live',
3015 'only_matching': True,
f07e276a
S
3016 }]
3017
3018 def _real_extract(self, url):
3019 mobj = re.match(self._VALID_URL, url)
3020 channel_id = mobj.group('id')
3021 base_url = mobj.group('base_url')
3022 webpage = self._download_webpage(url, channel_id, fatal=False)
3023 if webpage:
3024 page_type = self._og_search_property(
e7f3529f 3025 'type', webpage, 'page type', default='')
f07e276a
S
3026 video_id = self._html_search_meta(
3027 'videoId', webpage, 'video id', default=None)
e7f3529f
S
3028 if page_type.startswith('video') and video_id and re.match(
3029 r'^[0-9A-Za-z_-]{11}$', video_id):
f07e276a
S
3030 return self.url_result(video_id, YoutubeIE.ie_key())
3031 return self.url_result(base_url)
3032
3033
e462474e
S
3034class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3035 IE_DESC = 'YouTube.com user/channel playlists'
19a107f2 3036 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
e462474e 3037 IE_NAME = 'youtube:playlists'
0c148415 3038
e568c223 3039 _TESTS = [{
2d3d2997 3040 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
0c148415
S
3041 'playlist_mincount': 4,
3042 'info_dict': {
3043 'id': 'ThirstForScience',
13a75688 3044 'title': 'ThirstForScience',
0c148415 3045 },
e568c223
S
3046 }, {
3047 # with "Load more" button
2d3d2997 3048 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
e568c223
S
3049 'playlist_mincount': 70,
3050 'info_dict': {
3051 'id': 'igorkle1',
3052 'title': 'Игорь Клейнер',
3053 },
e462474e
S
3054 }, {
3055 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3056 'playlist_mincount': 17,
3057 'info_dict': {
3058 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3059 'title': 'Chem Player',
3060 },
13a75688 3061 'skip': 'Blocked',
e568c223 3062 }]
0c148415
S
3063
3064
870f3bfc
S
3065class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3066 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3067
3068
3069class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
78caa52a 3070 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
3071 # there doesn't appear to be a real limit, for example if you search for
3072 # 'python' you get more than 8.000.000 results
3073 _MAX_RESULTS = float('inf')
78caa52a 3074 IE_NAME = 'youtube:search'
b05654f0 3075 _SEARCH_KEY = 'ytsearch'
b4c08069 3076 _EXTRA_QUERY_ARGS = {}
9dd8e46a 3077 _TESTS = []
b05654f0 3078
b05654f0
PH
3079 def _get_n_results(self, query, n):
3080 """Get a specified number of results for a query"""
3081
b4c08069 3082 videos = []
b05654f0
PH
3083 limit = n
3084
a22b2fd1
YCH
3085 url_query = {
3086 'search_query': query.encode('utf-8'),
3087 }
3088 url_query.update(self._EXTRA_QUERY_ARGS)
3089 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3090
b4c08069 3091 for pagenum in itertools.count(1):
b4c08069 3092 data = self._download_json(
69ea8ca4 3093 result_url, video_id='query "%s"' % query,
b4c08069 3094 note='Downloading page %s' % pagenum,
a22b2fd1
YCH
3095 errnote='Unable to download API page',
3096 query={'spf': 'navigate'})
b4c08069 3097 html_content = data[1]['body']['content']
7cc3570e 3098
b4c08069 3099 if 'class="search-message' in html_content:
07ad22b8 3100 raise ExtractorError(
78caa52a 3101 '[youtube] No video results', expected=True)
b05654f0 3102
870f3bfc 3103 new_videos = list(self._process_page(html_content))
b4c08069
JMF
3104 videos += new_videos
3105 if not new_videos or len(videos) > limit:
3106 break
a22b2fd1
YCH
3107 next_link = self._html_search_regex(
3108 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3109 html_content, 'next link', default=None)
3110 if next_link is None:
3111 break
3112 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
b05654f0 3113
b4c08069
JMF
3114 if len(videos) > n:
3115 videos = videos[:n]
b05654f0 3116 return self.playlist_result(videos, query)
75dff0ee 3117
c9ae7b95 3118
a3dd9248 3119class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3120 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3121 _SEARCH_KEY = 'ytsearchdate'
78caa52a 3122 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 3123 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 3124
c9ae7b95 3125
870f3bfc 3126class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
78caa52a
PH
3127 IE_DESC = 'YouTube.com search URLs'
3128 IE_NAME = 'youtube:search_url'
d2c1f79f 3129 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
cdc628a4
PH
3130 _TESTS = [{
3131 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3132 'playlist_mincount': 5,
3133 'info_dict': {
3134 'title': 'youtube-dl test video',
3135 }
d2c1f79f
S
3136 }, {
3137 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3138 'only_matching': True,
cdc628a4 3139 }]
c9ae7b95
PH
3140
3141 def _real_extract(self, url):
3142 mobj = re.match(self._VALID_URL, url)
7fd002c0 3143 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 3144 webpage = self._download_webpage(url, query)
175c2e9e 3145 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
3146
3147
136dadde 3148class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 3149 IE_DESC = 'YouTube.com (multi-season) shows'
92519402 3150 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 3151 IE_NAME = 'youtube:show'
cdc628a4 3152 _TESTS = [{
4003bd82 3153 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 3154 'playlist_mincount': 5,
cdc628a4
PH
3155 'info_dict': {
3156 'id': 'airdisasters',
3157 'title': 'Air Disasters',
3158 }
3159 }]
75dff0ee
JMF
3160
3161 def _real_extract(self, url):
136dadde
S
3162 playlist_id = self._match_id(url)
3163 return super(YoutubeShowIE, self)._real_extract(
3164 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
3165
3166
b2e8bc1b 3167class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 3168 """
25f14e9f 3169 Base class for feed extractors
d7ae0639
JMF
3170 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3171 """
b2e8bc1b 3172 _LOGIN_REQUIRED = True
d7ae0639
JMF
3173
3174 @property
3175 def IE_NAME(self):
78caa52a 3176 return 'youtube:%s' % self._FEED_NAME
04cc9617 3177
81f0259b 3178 def _real_initialize(self):
b2e8bc1b 3179 self._login()
81f0259b 3180
3853309f 3181 def _entries(self, page):
2bc43303
JMF
3182 # The extraction process is the same as for playlists, but the regex
3183 # for the video ids doesn't contain an index
3184 ids = []
3185 more_widget_html = content_html = page
2bc43303
JMF
3186 for page_num in itertools.count(1):
3187 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
3188
3189 # 'recommended' feed has infinite 'load more' and each new portion spins
3190 # the same videos in (sometimes) slightly different order, so we'll check
3191 # for unicity and break when portion has no new videos
3853309f 3192 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
62c95fd5
S
3193 if not new_ids:
3194 break
3195
2bc43303
JMF
3196 ids.extend(new_ids)
3197
3853309f
S
3198 for entry in self._ids_to_results(new_ids):
3199 yield entry
3200
2bc43303
JMF
3201 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3202 if not mobj:
3203 break
3204
3205 more = self._download_json(
19a107f2 3206 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303 3207 'Downloading page #%s' % page_num,
19a107f2 3208 transform_source=uppercase_escape)
2bc43303
JMF
3209 content_html = more['content_html']
3210 more_widget_html = more['load_more_widget_html']
3211
3853309f
S
3212 def _real_extract(self, url):
3213 page = self._download_webpage(
3214 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3215 self._PLAYLIST_TITLE)
25f14e9f 3216 return self.playlist_result(
3853309f 3217 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
25f14e9f
S
3218
3219
3220class YoutubeWatchLaterIE(YoutubePlaylistIE):
3221 IE_NAME = 'youtube:watchlater'
3222 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
92519402 3223 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 3224
bc7a9cd8
S
3225 _TESTS = [{
3226 'url': 'https://www.youtube.com/playlist?list=WL',
3227 'only_matching': True,
3228 }, {
3229 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3230 'only_matching': True,
3231 }]
25f14e9f
S
3232
3233 def _real_extract(self, url):
7e5dc339 3234 _, video = self._check_download_just_video(url, 'WL')
ebf1b291
S
3235 if video:
3236 return video
dacb3a86
S
3237 _, playlist = self._extract_playlist('WL')
3238 return playlist
f459d170 3239
5f6a1245 3240
c626a3d9 3241class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 3242 IE_NAME = 'youtube:favorites'
f3a34072 3243 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
92519402 3244 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
3245 _LOGIN_REQUIRED = True
3246
3247 def _real_extract(self, url):
3248 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 3249 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 3250 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
3251
3252
25f14e9f
S
3253class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3254 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
92519402 3255 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
25f14e9f
S
3256 _FEED_NAME = 'recommended'
3257 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 3258
1ed5b5c9 3259
25f14e9f
S
3260class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3261 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
92519402 3262 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
25f14e9f
S
3263 _FEED_NAME = 'subscriptions'
3264 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 3265
1ed5b5c9 3266
25f14e9f
S
3267class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3268 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
92519402 3269 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
25f14e9f
S
3270 _FEED_NAME = 'history'
3271 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
3272
3273
15870e90
PH
3274class YoutubeTruncatedURLIE(InfoExtractor):
3275 IE_NAME = 'youtube:truncated_url'
3276 IE_DESC = False # Do not list
975d35db 3277 _VALID_URL = r'''(?x)
b95aab84
PH
3278 (?:https?://)?
3279 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3280 (?:watch\?(?:
c4808c60 3281 feature=[a-z_]+|
b95aab84
PH
3282 annotation_id=annotation_[^&]+|
3283 x-yt-cl=[0-9]+|
c1708b89 3284 hl=[^&]*|
287be8c6 3285 t=[0-9]+
b95aab84
PH
3286 )?
3287 |
3288 attribution_link\?a=[^&]+
3289 )
3290 $
975d35db 3291 '''
15870e90 3292
c4808c60 3293 _TESTS = [{
2d3d2997 3294 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3295 'only_matching': True,
dc2fc736 3296 }, {
2d3d2997 3297 'url': 'https://www.youtube.com/watch?',
dc2fc736 3298 'only_matching': True,
b95aab84
PH
3299 }, {
3300 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3301 'only_matching': True,
3302 }, {
3303 'url': 'https://www.youtube.com/watch?feature=foo',
3304 'only_matching': True,
c1708b89
PH
3305 }, {
3306 'url': 'https://www.youtube.com/watch?hl=en-GB',
3307 'only_matching': True,
287be8c6
PH
3308 }, {
3309 'url': 'https://www.youtube.com/watch?t=2372',
3310 'only_matching': True,
c4808c60
PH
3311 }]
3312
15870e90
PH
3313 def _real_extract(self, url):
3314 raise ExtractorError(
78caa52a
PH
3315 'Did you forget to quote the URL? Remember that & is a meta '
3316 'character in most shells, so you want to put the URL in quotes, '
3317 'like youtube-dl '
2d3d2997 3318 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
78caa52a 3319 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3320 expected=True)
772fd5cc
PH
3321
3322
3323class YoutubeTruncatedIDIE(InfoExtractor):
3324 IE_NAME = 'youtube:truncated_id'
3325 IE_DESC = False # Do not list
b95aab84 3326 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3327
3328 _TESTS = [{
3329 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3330 'only_matching': True,
3331 }]
3332
3333 def _real_extract(self, url):
3334 video_id = self._match_id(url)
3335 raise ExtractorError(
3336 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3337 expected=True)