]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
[youtube] Fix age-gated videos support without login (closes #24248)
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
f8c55c66 19 compat_HTTPError,
8d81f3e3 20 compat_kwargs,
c5e8d7af 21 compat_parse_qs,
7fd002c0
S
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
c5e8d7af 27 compat_str,
4bb4a188
PH
28)
29from ..utils import (
27019dbb 30 bool_or_none,
c5e8d7af 31 clean_html,
026fbedc 32 dict_get,
9b9c5355 33 error_to_compat_str,
351f37c0 34 extract_attributes,
c5e8d7af 35 ExtractorError,
2d30521a 36 float_or_none,
4bb4a188
PH
37 get_element_by_attribute,
38 get_element_by_id,
dd27fd17 39 int_or_none,
94278f72 40 mimetype2ext,
4bb4a188 41 orderedSet,
6310acf5 42 parse_codecs,
7c80519c 43 parse_duration,
0cb58b02 44 remove_quotes,
3995d37d 45 remove_start,
cf7e015f 46 smuggle_url,
dbdaaa23 47 str_or_none,
c93d53f5 48 str_to_int,
556dbe7f 49 try_get,
c5e8d7af
PH
50 unescapeHTML,
51 unified_strdate,
cf7e015f 52 unsmuggle_url,
81c2f20b 53 uppercase_escape,
21c340b8 54 url_or_none,
6e6bc8da 55 urlencode_postdata,
c5e8d7af
PH
56)
57
5f6a1245 58
de7f3446 59class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
60 """Provide base functions for Youtube extractors"""
61 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 62 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
63
64 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
65 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
66 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 67
b2e8bc1b
JMF
68 _NETRC_MACHINE = 'youtube'
69 # If True it will raise an error if no login info is provided
70 _LOGIN_REQUIRED = False
71
66b48727 72 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
d0ba5587 73
b2e8bc1b 74 def _set_language(self):
810fb84d
PH
75 self._set_cookie(
76 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 77 # YouTube sets the expire time to about two months
810fb84d 78 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 79
25f14e9f
S
80 def _ids_to_results(self, ids):
81 return [
82 self.url_result(vid_id, 'Youtube', video_id=vid_id)
83 for vid_id in ids]
84
b2e8bc1b 85 def _login(self):
83317f69 86 """
87 Attempt to log in to YouTube.
88 True is returned if successful or skipped.
89 False is returned if login failed.
90
91 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
92 """
68217024 93 username, password = self._get_login_info()
b2e8bc1b
JMF
94 # No authentication to be performed
95 if username is None:
70d35d16 96 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 97 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 98 return True
b2e8bc1b 99
7cc3570e
PH
100 login_page = self._download_webpage(
101 self._LOGIN_URL, None,
69ea8ca4
PH
102 note='Downloading login page',
103 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
104 if login_page is False:
105 return
b2e8bc1b 106
1212e997 107 login_form = self._hidden_inputs(login_page)
c5e8d7af 108
e00eb564
S
109 def req(url, f_req, note, errnote):
110 data = login_form.copy()
111 data.update({
112 'pstMsg': 1,
113 'checkConnection': 'youtube',
114 'checkedDomains': 'youtube',
115 'hl': 'en',
116 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 117 'f.req': json.dumps(f_req),
e00eb564
S
118 'flowName': 'GlifWebSignIn',
119 'flowEntry': 'ServiceLogin',
baf67a60
S
120 # TODO: reverse actual botguard identifier generation algo
121 'bgRequest': '["identifier",""]',
041bc3ad 122 })
e00eb564
S
123 return self._download_json(
124 url, None, note=note, errnote=errnote,
125 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
126 fatal=False,
127 data=urlencode_postdata(data), headers={
128 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
129 'Google-Accounts-XSRF': 1,
130 })
131
3995d37d
S
132 def warn(message):
133 self._downloader.report_warning(message)
134
135 lookup_req = [
136 username,
137 None, [], None, 'US', None, None, 2, False, True,
138 [
139 None, None,
140 [2, 1, None, 1,
141 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
142 None, [], 4],
143 1, [None, None, []], None, None, None, True
144 ],
145 username,
146 ]
147
e00eb564 148 lookup_results = req(
3995d37d 149 self._LOOKUP_URL, lookup_req,
e00eb564
S
150 'Looking up account info', 'Unable to look up account info')
151
152 if lookup_results is False:
153 return False
041bc3ad 154
3995d37d
S
155 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
156 if not user_hash:
157 warn('Unable to extract user hash')
158 return False
159
160 challenge_req = [
161 user_hash,
162 None, 1, None, [1, None, None, None, [password, None, True]],
163 [
164 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
165 1, [None, None, []], None, None, None, True
166 ]]
83317f69 167
3995d37d
S
168 challenge_results = req(
169 self._CHALLENGE_URL, challenge_req,
170 'Logging in', 'Unable to log in')
83317f69 171
3995d37d 172 if challenge_results is False:
e00eb564 173 return
83317f69 174
3995d37d
S
175 login_res = try_get(challenge_results, lambda x: x[0][5], list)
176 if login_res:
177 login_msg = try_get(login_res, lambda x: x[5], compat_str)
178 warn(
179 'Unable to login: %s' % 'Invalid password'
180 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
181 return False
182
183 res = try_get(challenge_results, lambda x: x[0][-1], list)
184 if not res:
185 warn('Unable to extract result entry')
186 return False
187
9a6628aa
S
188 login_challenge = try_get(res, lambda x: x[0][0], list)
189 if login_challenge:
190 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
191 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
192 # SEND_SUCCESS - TFA code has been successfully sent to phone
193 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 194 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
195 if status == 'QUOTA_EXCEEDED':
196 warn('Exceeded the limit of TFA codes, try later')
197 return False
198
199 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
200 if not tl:
201 warn('Unable to extract TL')
202 return False
203
204 tfa_code = self._get_tfa_info('2-step verification code')
205
206 if not tfa_code:
207 warn(
208 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
209 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
210 return False
211
212 tfa_code = remove_start(tfa_code, 'G-')
213
214 tfa_req = [
215 user_hash, None, 2, None,
216 [
217 9, None, None, None, None, None, None, None,
218 [None, tfa_code, True, 2]
219 ]]
220
221 tfa_results = req(
222 self._TFA_URL.format(tl), tfa_req,
223 'Submitting TFA code', 'Unable to submit TFA code')
224
225 if tfa_results is False:
226 return False
227
228 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
229 if tfa_res:
230 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
231 warn(
232 'Unable to finish TFA: %s' % 'Invalid TFA code'
233 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
234 return False
235
236 check_cookie_url = try_get(
237 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
238 else:
239 CHALLENGES = {
240 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
241 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
242 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
243 }
244 challenge = CHALLENGES.get(
245 challenge_str,
246 '%s returned error %s.' % (self.IE_NAME, challenge_str))
247 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
248 return False
3995d37d
S
249 else:
250 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
251
252 if not check_cookie_url:
253 warn('Unable to extract CheckCookie URL')
254 return False
e00eb564
S
255
256 check_cookie_results = self._download_webpage(
3995d37d
S
257 check_cookie_url, None, 'Checking cookie', fatal=False)
258
259 if check_cookie_results is False:
260 return False
e00eb564 261
3995d37d
S
262 if 'https://myaccount.google.com/' not in check_cookie_results:
263 warn('Unable to log in')
b2e8bc1b 264 return False
e00eb564 265
b2e8bc1b
JMF
266 return True
267
30226342 268 def _download_webpage_handle(self, *args, **kwargs):
c1148516
S
269 query = kwargs.get('query', {}).copy()
270 query['disable_polymer'] = 'true'
271 kwargs['query'] = query
30226342 272 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
273 *args, **compat_kwargs(kwargs))
274
b2e8bc1b
JMF
275 def _real_initialize(self):
276 if self._downloader is None:
277 return
42939b61 278 self._set_language()
b2e8bc1b
JMF
279 if not self._login():
280 return
c5e8d7af 281
8377574c 282
8e7aad20 283class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 284 # Extract entries from page with "Load more" button
648e6a1f
S
285 def _entries(self, page, playlist_id):
286 more_widget_html = content_html = page
287 for page_num in itertools.count(1):
061a75ed
S
288 for entry in self._process_page(content_html):
289 yield entry
648e6a1f
S
290
291 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
292 if not mobj:
293 break
294
f8c55c66
S
295 count = 0
296 retries = 3
297 while count <= retries:
298 try:
299 # Downloading page may result in intermittent 5xx HTTP error
300 # that is usually worked around with a retry
301 more = self._download_json(
302 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
303 'Downloading page #%s%s'
304 % (page_num, ' (retry #%d)' % count if count else ''),
305 transform_source=uppercase_escape)
306 break
307 except ExtractorError as e:
308 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
309 count += 1
310 if count <= retries:
311 continue
312 raise
313
648e6a1f
S
314 content_html = more['content_html']
315 if not content_html.strip():
316 # Some webpages show a "Load more" button but they don't
317 # have more videos
318 break
319 more_widget_html = more['load_more_widget_html']
320
061a75ed
S
321
322class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
323 def _process_page(self, content):
324 for video_id, video_title in self.extract_videos_from_page(content):
325 yield self.url_result(video_id, 'Youtube', video_id, video_title)
326
351f37c0
S
327 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
328 for mobj in re.finditer(video_re, page):
648e6a1f
S
329 # The link with index 0 is not the first video of the playlist (not sure if still actual)
330 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
331 continue
332 video_id = mobj.group('id')
351f37c0
S
333 video_title = unescapeHTML(
334 mobj.group('title')) if 'title' in mobj.groupdict() else None
648e6a1f
S
335 if video_title:
336 video_title = video_title.strip()
351f37c0
S
337 if video_title == '► Play all':
338 video_title = None
648e6a1f
S
339 try:
340 idx = ids_in_page.index(video_id)
341 if video_title and not titles_in_page[idx]:
342 titles_in_page[idx] = video_title
343 except ValueError:
344 ids_in_page.append(video_id)
345 titles_in_page.append(video_title)
351f37c0
S
346
347 def extract_videos_from_page(self, page):
348 ids_in_page = []
349 titles_in_page = []
350 self.extract_videos_from_page_impl(
351 self._VIDEO_RE, page, ids_in_page, titles_in_page)
648e6a1f
S
352 return zip(ids_in_page, titles_in_page)
353
354
061a75ed
S
355class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
356 def _process_page(self, content):
6dee688e
S
357 for playlist_id in orderedSet(re.findall(
358 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
359 content)):
061a75ed
S
360 yield self.url_result(
361 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
362
0c148415
S
363 def _real_extract(self, url):
364 playlist_id = self._match_id(url)
365 webpage = self._download_webpage(url, playlist_id)
0c148415 366 title = self._og_search_title(webpage, fatal=False)
061a75ed 367 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
368
369
360e1ca5 370class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 371 IE_DESC = 'YouTube.com'
cb7dfeea 372 _VALID_URL = r"""(?x)^
c5e8d7af 373 (
edb53e2d 374 (?:https?://|//) # http(s):// or protocol-independent URL
66b48727 375 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
484aaeb2 376 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 377 (?:www\.)?pwnyoutube\.com/|
8b561bfc 378 (?:www\.)?hooktube\.com/|
f7000f3a 379 (?:www\.)?yourepeat\.com/|
e69ae5b9 380 tube\.majestyc\.net/|
ba036333 381 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
77d95677 382 (?:(?:www|dev)\.)?invidio\.us/|
ba036333 383 (?:(?:www|no)\.)?invidiou\.sh/|
384 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
8ae113ca 385 (?:www\.)?invidious\.kabi\.tk/|
ba036333 386 (?:www\.)?invidious\.13ad\.de/|
791d2e81 387 (?:www\.)?invidious\.mastodon\.host/|
494d664e 388 (?:www\.)?invidious\.nixnet\.xyz/|
666d808e 389 (?:www\.)?invidious\.drycat\.fr/|
ba036333 390 (?:www\.)?tube\.poal\.co/|
8ae113ca 391 (?:www\.)?vid\.wxzm\.sx/|
494d664e 392 (?:www\.)?yt\.elukerio\.org/|
894b3826 393 (?:www\.)?yt\.lelux\.fi/|
bff90fc5 394 (?:www\.)?kgg2m7yk5aybusll\.onion/|
395 (?:www\.)?qklhadlycap4cnod\.onion/|
396 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
397 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
398 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
399 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
33c1c7d8 400 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
e69ae5b9 401 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
402 (?:.*?\#/)? # handle anchor (#/) redirect urls
403 (?: # the various things that can precede the ID:
ac7553d0 404 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 405 |(?: # or the v= param in all its forms
f7000f3a 406 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 407 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 408 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
409 v=
410 )
f4b05232 411 ))
cbaed4bb
S
412 |(?:
413 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
414 vid\.plus| # or vid.plus/xxxx
415 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 416 )/
edb53e2d 417 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 418 )
c5e8d7af 419 )? # all until now is optional -> you can pass the naked ID
8963d9c2 420 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
421 (?!.*?\blist=
422 (?:
423 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
424 WL # WL are handled by the watch later IE
425 )
426 )
c5e8d7af 427 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 428 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 429 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26 430 _formats = {
c2d3cb4c 431 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
432 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
433 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
434 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
435 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
436 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
437 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
438 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 439 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 440 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
441 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
442 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
443 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
444 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
445 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 446 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 447 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
448 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 449
450
451 # 3D videos
c2d3cb4c 452 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
453 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
454 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
455 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 456 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
457 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
458 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 459
96fb5605 460 # Apple HTTP Live Streaming
11f12195 461 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 462 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
463 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
464 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
465 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
466 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 467 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
468 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
469
470 # DASH mp4 video
d23028a8
S
471 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
472 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
473 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
474 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
475 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 476 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
477 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
478 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
479 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
480 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
481 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
482 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 483
f6f1fc92 484 # Dash mp4 audio
d23028a8
S
485 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
486 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
487 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
488 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
489 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
490 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
491 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
492
493 # Dash webm
d23028a8
S
494 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
495 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
496 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
497 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
498 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
499 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
500 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
501 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
502 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
503 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
504 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
505 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
507 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
508 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 509 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
510 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
511 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
512 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
513 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
514 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
515 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
516
517 # Dash webm audio
d23028a8
S
518 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
519 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 520
0857baad 521 # Dash webm audio with opus inside
d23028a8
S
522 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
523 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
524 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 525
ce6b9a2d
PH
526 # RTMP (unnamed)
527 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
528
529 # av01 video only formats sometimes served with "unknown" codecs
530 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
531 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
532 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
533 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 534 }
19041a38 535 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 536
fd5c4aab
S
537 _GEO_BYPASS = False
538
78caa52a 539 IE_NAME = 'youtube'
2eb88d95
PH
540 _TESTS = [
541 {
2d3d2997 542 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
543 'info_dict': {
544 'id': 'BaW_jenozKc',
545 'ext': 'mp4',
546 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
547 'uploader': 'Philipp Hagemeister',
548 'uploader_id': 'phihag',
ec85ded8 549 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
550 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
551 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e
PH
552 'upload_date': '20121002',
553 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
554 'categories': ['Science & Technology'],
000b6b5a 555 'tags': ['youtube-dl'],
556dbe7f 556 'duration': 10,
dbdaaa23 557 'view_count': int,
3e7c1224
PH
558 'like_count': int,
559 'dislike_count': int,
7c80519c 560 'start_time': 1,
297a564b 561 'end_time': 9,
2eb88d95 562 }
0e853ca4 563 },
0e853ca4 564 {
2d3d2997 565 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
4bc3a23e
PH
566 'note': 'Test generic use_cipher_signature video (#897)',
567 'info_dict': {
568 'id': 'UxxajLWwzqY',
569 'ext': 'mp4',
570 'upload_date': '20120506',
571 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 572 'alt_title': 'I Love It (feat. Charli XCX)',
7caf9830 573 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
000b6b5a
S
574 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
575 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
576 'iconic ep', 'iconic', 'love', 'it'],
556dbe7f 577 'duration': 180,
4bc3a23e
PH
578 'uploader': 'Icona Pop',
579 'uploader_id': 'IconaPop',
ec85ded8 580 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
0cb58b02 581 'creator': 'Icona Pop',
936784b2
S
582 'track': 'I Love It (feat. Charli XCX)',
583 'artist': 'Icona Pop',
2eb88d95 584 }
c108eb73
JMF
585 },
586 {
4bc3a23e
PH
587 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
588 'note': 'Test VEVO video with age protection (#956)',
589 'info_dict': {
590 'id': '07FYdnEawAQ',
591 'ext': 'mp4',
592 'upload_date': '20130703',
4fe54c12 593 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
0cb58b02 594 'alt_title': 'Tunnel Vision',
4fe54c12 595 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
556dbe7f 596 'duration': 419,
4bc3a23e
PH
597 'uploader': 'justintimberlakeVEVO',
598 'uploader_id': 'justintimberlakeVEVO',
ec85ded8 599 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
0cb58b02 600 'creator': 'Justin Timberlake',
7e72694b 601 'track': 'Tunnel Vision',
936784b2 602 'artist': 'Justin Timberlake',
34952f09 603 'age_limit': 18,
c108eb73
JMF
604 }
605 },
fccd3771 606 {
4bc3a23e
PH
607 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
608 'note': 'Embed-only video (#1746)',
609 'info_dict': {
610 'id': 'yZIXLfi8CZQ',
611 'ext': 'mp4',
612 'upload_date': '20120608',
613 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
614 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
615 'uploader': 'SET India',
94bfcd23 616 'uploader_id': 'setindia',
ec85ded8 617 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 618 'age_limit': 18,
fccd3771
PH
619 }
620 },
11b56058 621 {
2d3d2997 622 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
623 'note': 'Use the first video ID in the URL',
624 'info_dict': {
625 'id': 'BaW_jenozKc',
626 'ext': 'mp4',
627 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
628 'uploader': 'Philipp Hagemeister',
629 'uploader_id': 'phihag',
ec85ded8 630 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058
PM
631 'upload_date': '20121002',
632 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
633 'categories': ['Science & Technology'],
634 'tags': ['youtube-dl'],
556dbe7f 635 'duration': 10,
dbdaaa23 636 'view_count': int,
11b56058
PM
637 'like_count': int,
638 'dislike_count': int,
34a7de29
S
639 },
640 'params': {
641 'skip_download': True,
642 },
11b56058 643 },
dd27fd17 644 {
2d3d2997 645 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
646 'note': '256k DASH audio (format 141) via DASH manifest',
647 'info_dict': {
648 'id': 'a9LDPn-MO4I',
649 'ext': 'm4a',
650 'upload_date': '20121002',
651 'uploader_id': '8KVIDEO',
ec85ded8 652 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
653 'description': '',
654 'uploader': '8KVIDEO',
655 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 656 },
4bc3a23e
PH
657 'params': {
658 'youtube_include_dash_manifest': True,
659 'format': '141',
4919603f 660 },
de3c7fe0 661 'skip': 'format 141 not served anymore',
dd27fd17 662 },
3489b7d2
JMF
663 # DASH manifest with encrypted signature
664 {
78caa52a
PH
665 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
666 'info_dict': {
667 'id': 'IB3lcPjvWLA',
668 'ext': 'm4a',
4fe54c12
S
669 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
670 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
556dbe7f 671 'duration': 244,
78caa52a
PH
672 'uploader': 'AfrojackVEVO',
673 'uploader_id': 'AfrojackVEVO',
674 'upload_date': '20131011',
3489b7d2 675 },
4bc3a23e 676 'params': {
78caa52a 677 'youtube_include_dash_manifest': True,
de3c7fe0 678 'format': '141/bestaudio[ext=m4a]',
3489b7d2
JMF
679 },
680 },
aaeb86f6
S
681 # JS player signature function name containing $
682 {
683 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
684 'info_dict': {
685 'id': 'nfWlot6h_JM',
686 'ext': 'm4a',
687 'title': 'Taylor Swift - Shake It Off',
4fe54c12 688 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
556dbe7f 689 'duration': 242,
aaeb86f6
S
690 'uploader': 'TaylorSwiftVEVO',
691 'uploader_id': 'TaylorSwiftVEVO',
692 'upload_date': '20140818',
0cb58b02 693 'creator': 'Taylor Swift',
aaeb86f6
S
694 },
695 'params': {
696 'youtube_include_dash_manifest': True,
de3c7fe0 697 'format': '141/bestaudio[ext=m4a]',
aaeb86f6
S
698 },
699 },
aa79ac0c
PH
700 # Controversy video
701 {
702 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
703 'info_dict': {
704 'id': 'T4XJQO3qol8',
705 'ext': 'mp4',
556dbe7f 706 'duration': 219,
aa79ac0c 707 'upload_date': '20100909',
4fe54c12 708 'uploader': 'Amazing Atheist',
aa79ac0c 709 'uploader_id': 'TheAmazingAtheist',
ec85ded8 710 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c
PH
711 'title': 'Burning Everyone\'s Koran',
712 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
713 }
c522adb1
JMF
714 },
715 # Normal age-gate video (No vevo, embed allowed)
716 {
2d3d2997 717 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
718 'info_dict': {
719 'id': 'HtVdAasjOgU',
720 'ext': 'mp4',
721 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 722 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 723 'duration': 142,
c522adb1
JMF
724 'uploader': 'The Witcher',
725 'uploader_id': 'WitcherGame',
ec85ded8 726 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 727 'upload_date': '20140605',
34952f09 728 'age_limit': 18,
c522adb1
JMF
729 },
730 },
fccae2b9
S
731 # Age-gate video with encrypted signature
732 {
2d3d2997 733 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
fccae2b9
S
734 'info_dict': {
735 'id': '6kLq3WMV1nU',
4fe54c12 736 'ext': 'mp4',
fccae2b9
S
737 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
738 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
eb6793ba 739 'duration': 246,
fccae2b9
S
740 'uploader': 'LloydVEVO',
741 'uploader_id': 'LloydVEVO',
ec85ded8 742 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 743 'upload_date': '20110629',
34952f09 744 'age_limit': 18,
fccae2b9
S
745 },
746 },
067aa17e 747 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
7d02dcfa 748 # YouTube Red ad is not captured for creator
774e208f
PH
749 {
750 'url': '__2ABJjxzNo',
751 'info_dict': {
752 'id': '__2ABJjxzNo',
753 'ext': 'mp4',
556dbe7f 754 'duration': 266,
774e208f
PH
755 'upload_date': '20100430',
756 'uploader_id': 'deadmau5',
ec85ded8 757 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
0cb58b02 758 'creator': 'deadmau5',
774e208f
PH
759 'description': 'md5:12c56784b8032162bb936a5f76d55360',
760 'uploader': 'deadmau5',
761 'title': 'Deadmau5 - Some Chords (HD)',
0cb58b02 762 'alt_title': 'Some Chords',
774e208f
PH
763 },
764 'expected_warnings': [
765 'DASH manifest missing',
766 ]
e52a40ab 767 },
067aa17e 768 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
769 {
770 'url': 'lqQg6PlCWgI',
771 'info_dict': {
772 'id': 'lqQg6PlCWgI',
773 'ext': 'mp4',
556dbe7f 774 'duration': 6085,
90227264 775 'upload_date': '20150827',
cbe2bd91 776 'uploader_id': 'olympic',
ec85ded8 777 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 778 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 779 'uploader': 'Olympic',
cbe2bd91
PH
780 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
781 },
782 'params': {
783 'skip_download': 'requires avconv',
e52a40ab 784 }
cbe2bd91 785 },
6271f1ca
PH
786 # Non-square pixels
787 {
788 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
789 'info_dict': {
790 'id': '_b-2C3KPAM0',
791 'ext': 'mp4',
792 'stretched_ratio': 16 / 9.,
556dbe7f 793 'duration': 85,
6271f1ca
PH
794 'upload_date': '20110310',
795 'uploader_id': 'AllenMeow',
ec85ded8 796 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 797 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 798 'uploader': '孫ᄋᄅ',
6271f1ca
PH
799 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
800 },
06b491eb
S
801 },
802 # url_encoded_fmt_stream_map is empty string
803 {
804 'url': 'qEJwOuvDf7I',
805 'info_dict': {
806 'id': 'qEJwOuvDf7I',
f57b7835 807 'ext': 'webm',
06b491eb
S
808 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
809 'description': '',
810 'upload_date': '20150404',
811 'uploader_id': 'spbelect',
812 'uploader': 'Наблюдатели Петербурга',
813 },
814 'params': {
815 'skip_download': 'requires avconv',
e323cf3f
S
816 },
817 'skip': 'This live event has ended.',
06b491eb 818 },
067aa17e 819 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
820 {
821 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
822 'info_dict': {
823 'id': 'FIl7x6_3R5Y',
eb6793ba 824 'ext': 'webm',
da77d856
S
825 'title': 'md5:7b81415841e02ecd4313668cde88737a',
826 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 827 'duration': 220,
da77d856
S
828 'upload_date': '20150625',
829 'uploader_id': 'dorappi2000',
ec85ded8 830 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 831 'uploader': 'dorappi2000',
eb6793ba 832 'formats': 'mincount:31',
da77d856 833 },
eb6793ba 834 'skip': 'not actual anymore',
2ee8f5d8 835 },
8a1a26ce
YCH
836 # DASH manifest with segment_list
837 {
838 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
839 'md5': '8ce563a1d667b599d21064e982ab9e31',
840 'info_dict': {
841 'id': 'CsmdDsKjzN8',
842 'ext': 'mp4',
17ee98e1 843 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
844 'uploader': 'Airtek',
845 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
846 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
847 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
848 },
849 'params': {
850 'youtube_include_dash_manifest': True,
851 'format': '135', # bestvideo
be49068d
S
852 },
853 'skip': 'This live event has ended.',
2ee8f5d8 854 },
cf7e015f
S
855 {
856 # Multifeed videos (multiple cameras), URL is for Main Camera
857 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
858 'info_dict': {
859 'id': 'jqWvoWXjCVs',
860 'title': 'teamPGP: Rocket League Noob Stream',
861 'description': 'md5:dc7872fb300e143831327f1bae3af010',
862 },
863 'playlist': [{
864 'info_dict': {
865 'id': 'jqWvoWXjCVs',
866 'ext': 'mp4',
867 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
868 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 869 'duration': 7335,
cf7e015f
S
870 'upload_date': '20150721',
871 'uploader': 'Beer Games Beer',
872 'uploader_id': 'beergamesbeer',
ec85ded8 873 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 874 'license': 'Standard YouTube License',
cf7e015f
S
875 },
876 }, {
877 'info_dict': {
878 'id': '6h8e8xoXJzg',
879 'ext': 'mp4',
880 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
881 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 882 'duration': 7337,
cf7e015f
S
883 'upload_date': '20150721',
884 'uploader': 'Beer Games Beer',
885 'uploader_id': 'beergamesbeer',
ec85ded8 886 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 887 'license': 'Standard YouTube License',
cf7e015f
S
888 },
889 }, {
890 'info_dict': {
891 'id': 'PUOgX5z9xZw',
892 'ext': 'mp4',
893 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
894 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 895 'duration': 7337,
cf7e015f
S
896 'upload_date': '20150721',
897 'uploader': 'Beer Games Beer',
898 'uploader_id': 'beergamesbeer',
ec85ded8 899 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 900 'license': 'Standard YouTube License',
cf7e015f
S
901 },
902 }, {
903 'info_dict': {
904 'id': 'teuwxikvS5k',
905 'ext': 'mp4',
906 'title': 'teamPGP: Rocket League Noob Stream (zim)',
907 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 908 'duration': 7334,
cf7e015f
S
909 'upload_date': '20150721',
910 'uploader': 'Beer Games Beer',
911 'uploader_id': 'beergamesbeer',
ec85ded8 912 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 913 'license': 'Standard YouTube License',
cf7e015f
S
914 },
915 }],
916 'params': {
917 'skip_download': True,
918 },
4fe54c12 919 'skip': 'This video is not available.',
cbaed4bb 920 },
f9f49d87 921 {
067aa17e 922 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
923 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
924 'info_dict': {
925 'id': 'gVfLd0zydlo',
926 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
927 },
928 'playlist_count': 2,
be49068d 929 'skip': 'Not multifeed anymore',
f9f49d87 930 },
cbaed4bb 931 {
2d3d2997 932 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 933 'only_matching': True,
0e49d9a6 934 },
6d4fc66b 935 {
2d3d2997 936 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
937 'only_matching': True,
938 },
0e49d9a6 939 {
067aa17e 940 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 941 # Also tests cut-off URL expansion in video description (see
067aa17e
S
942 # https://github.com/ytdl-org/youtube-dl/issues/1892,
943 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
944 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
945 'info_dict': {
946 'id': 'lsguqyKfVQg',
947 'ext': 'mp4',
948 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 949 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 950 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 951 'duration': 133,
0e49d9a6
LL
952 'upload_date': '20151119',
953 'uploader_id': 'IronSoulElf',
ec85ded8 954 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 955 'uploader': 'IronSoulElf',
eb6793ba
S
956 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
957 'track': 'Dark Walk - Position Music',
958 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 959 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
960 },
961 'params': {
962 'skip_download': True,
963 },
964 },
61f92af1 965 {
067aa17e 966 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
967 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
968 'only_matching': True,
969 },
313dfc45
LL
970 {
971 # Video with yt:stretch=17:0
972 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
973 'info_dict': {
974 'id': 'Q39EVAstoRM',
975 'ext': 'mp4',
976 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
977 'description': 'md5:ee18a25c350637c8faff806845bddee9',
978 'upload_date': '20151107',
979 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
980 'uploader': 'CH GAMER DROID',
981 },
982 'params': {
983 'skip_download': True,
984 },
be49068d 985 'skip': 'This video does not exist.',
313dfc45 986 },
7caf9830
S
987 {
988 # Video licensed under Creative Commons
989 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
990 'info_dict': {
991 'id': 'M4gD1WSo5mA',
992 'ext': 'mp4',
993 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
994 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 995 'duration': 721,
7caf9830
S
996 'upload_date': '20150127',
997 'uploader_id': 'BerkmanCenter',
ec85ded8 998 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 999 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1000 'license': 'Creative Commons Attribution license (reuse allowed)',
1001 },
1002 'params': {
1003 'skip_download': True,
1004 },
1005 },
fd050249
S
1006 {
1007 # Channel-like uploader_url
1008 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1009 'info_dict': {
1010 'id': 'eQcmzGIKrzg',
1011 'ext': 'mp4',
1012 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1013 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 1014 'duration': 4060,
fd050249 1015 'upload_date': '20151119',
eb6793ba 1016 'uploader': 'Bernie Sanders',
fd050249 1017 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1018 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1019 'license': 'Creative Commons Attribution license (reuse allowed)',
1020 },
1021 'params': {
1022 'skip_download': True,
1023 },
1024 },
040ac686
S
1025 {
1026 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1027 'only_matching': True,
7f29cf54
S
1028 },
1029 {
067aa17e 1030 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1031 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1032 'only_matching': True,
6496ccb4
S
1033 },
1034 {
1035 # Rental video preview
1036 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1037 'info_dict': {
1038 'id': 'uGpuVWrhIzE',
1039 'ext': 'mp4',
1040 'title': 'Piku - Trailer',
1041 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1042 'upload_date': '20150811',
1043 'uploader': 'FlixMatrix',
1044 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1045 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1046 'license': 'Standard YouTube License',
1047 },
1048 'params': {
1049 'skip_download': True,
1050 },
eb6793ba 1051 'skip': 'This video is not available.',
022a5d66 1052 },
12afdc2a
S
1053 {
1054 # YouTube Red video with episode data
1055 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1056 'info_dict': {
1057 'id': 'iqKdEhx-dD4',
1058 'ext': 'mp4',
1059 'title': 'Isolation - Mind Field (Ep 1)',
4fe54c12 1060 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
556dbe7f 1061 'duration': 2085,
12afdc2a
S
1062 'upload_date': '20170118',
1063 'uploader': 'Vsauce',
1064 'uploader_id': 'Vsauce',
1065 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1066 'series': 'Mind Field',
1067 'season_number': 1,
1068 'episode_number': 1,
1069 },
1070 'params': {
1071 'skip_download': True,
1072 },
1073 'expected_warnings': [
1074 'Skipping DASH manifest',
1075 ],
1076 },
c7121fa7
S
1077 {
1078 # The following content has been identified by the YouTube community
1079 # as inappropriate or offensive to some audiences.
1080 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1081 'info_dict': {
1082 'id': '6SJNVb0GnPI',
1083 'ext': 'mp4',
1084 'title': 'Race Differences in Intelligence',
1085 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1086 'duration': 965,
1087 'upload_date': '20140124',
1088 'uploader': 'New Century Foundation',
1089 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1090 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1091 },
1092 'params': {
1093 'skip_download': True,
1094 },
1095 },
022a5d66
S
1096 {
1097 # itag 212
1098 'url': '1t24XAntNCY',
1099 'only_matching': True,
fd5c4aab
S
1100 },
1101 {
1102 # geo restricted to JP
1103 'url': 'sJL6WA-aGkQ',
1104 'only_matching': True,
1105 },
d0ba5587
S
1106 {
1107 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1108 'only_matching': True,
1109 },
cd5a74a2
S
1110 {
1111 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1112 'only_matching': True,
1113 },
825cd268
RA
1114 {
1115 # DRM protected
1116 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1117 'only_matching': True,
4fe54c12
S
1118 },
1119 {
1120 # Video with unsupported adaptive stream type formats
1121 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1122 'info_dict': {
1123 'id': 'Z4Vy8R84T1U',
1124 'ext': 'mp4',
1125 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1126 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1127 'duration': 433,
1128 'upload_date': '20130923',
1129 'uploader': 'Amelia Putri Harwita',
1130 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1131 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1132 'formats': 'maxcount:10',
1133 },
1134 'params': {
1135 'skip_download': True,
1136 'youtube_include_dash_manifest': False,
1137 },
5caabd3c 1138 },
1139 {
822b9d9c 1140 # Youtube Music Auto-generated description
5caabd3c 1141 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1142 'info_dict': {
1143 'id': 'MgNrAu2pzNs',
1144 'ext': 'mp4',
1145 'title': 'Voyeur Girl',
1146 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1147 'upload_date': '20190312',
1148 'uploader': 'Various Artists - Topic',
1149 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1150 'artist': 'Stephen',
1151 'track': 'Voyeur Girl',
1152 'album': 'it\'s too much love to know my dear',
1153 'release_date': '20190313',
1154 'release_year': 2019,
1155 },
1156 'params': {
1157 'skip_download': True,
1158 },
1159 },
1160 {
822b9d9c 1161 # Youtube Music Auto-generated description
5caabd3c 1162 # Retrieve 'artist' field from 'Artist:' in video description
1163 # when it is present on youtube music video
5caabd3c 1164 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1165 'info_dict': {
1166 'id': 'k0jLE7tTwjY',
1167 'ext': 'mp4',
1168 'title': 'Latch Feat. Sam Smith',
1169 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1170 'upload_date': '20150110',
1171 'uploader': 'Various Artists - Topic',
1172 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1173 'artist': 'Disclosure',
1174 'track': 'Latch Feat. Sam Smith',
1175 'album': 'Latch Featuring Sam Smith',
1176 'release_date': '20121008',
1177 'release_year': 2012,
1178 },
1179 'params': {
1180 'skip_download': True,
1181 },
1182 },
1183 {
822b9d9c 1184 # Youtube Music Auto-generated description
5caabd3c 1185 # handle multiple artists on youtube music video
1186 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1187 'info_dict': {
1188 'id': '74qn0eJSjpA',
1189 'ext': 'mp4',
1190 'title': 'Eastside',
1191 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1192 'upload_date': '20180710',
1193 'uploader': 'Benny Blanco - Topic',
1194 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1195 'artist': 'benny blanco, Halsey, Khalid',
1196 'track': 'Eastside',
1197 'album': 'Eastside',
1198 'release_date': '20180713',
1199 'release_year': 2018,
1200 },
1201 'params': {
1202 'skip_download': True,
1203 },
1204 },
1205 {
822b9d9c 1206 # Youtube Music Auto-generated description
5caabd3c 1207 # handle youtube music video with release_year and no release_date
1208 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1209 'info_dict': {
1210 'id': '-hcAI0g-f5M',
1211 'ext': 'mp4',
1212 'title': 'Put It On Me',
1213 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1214 'upload_date': '20180426',
1215 'uploader': 'Matt Maeson - Topic',
1216 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1217 'artist': 'Matt Maeson',
1218 'track': 'Put It On Me',
1219 'album': 'The Hearse',
1220 'release_date': None,
1221 'release_year': 2018,
1222 },
1223 'params': {
1224 'skip_download': True,
1225 },
1226 },
66b48727
RA
1227 {
1228 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1229 'only_matching': True,
1230 },
2eb88d95
PH
1231 ]
1232
e0df6211
PH
1233 def __init__(self, *args, **kwargs):
1234 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1235 self._player_cache = {}
e0df6211 1236
c5e8d7af
PH
1237 def report_video_info_webpage_download(self, video_id):
1238 """Report attempt to download video info webpage."""
69ea8ca4 1239 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1240
c5e8d7af
PH
1241 def report_information_extraction(self, video_id):
1242 """Report attempt to extract video information."""
69ea8ca4 1243 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1244
1245 def report_unavailable_format(self, video_id, format):
1246 """Report extracted video URL."""
69ea8ca4 1247 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1248
1249 def report_rtmp_download(self):
1250 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1251 self.to_screen('RTMP download detected')
c5e8d7af 1252
60064c53
PH
1253 def _signature_cache_id(self, example_sig):
1254 """ Return a string representation of a signature """
78caa52a 1255 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53
PH
1256
1257 def _extract_signature_function(self, video_id, player_url, example_sig):
cf010131 1258 id_m = re.match(
dc879c5a 1259 r'.*?[-.](?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
cf010131 1260 player_url)
c081b35c
PH
1261 if not id_m:
1262 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
1263 player_type = id_m.group('ext')
1264 player_id = id_m.group('id')
1265
c4417ddb 1266 # Read from filesystem cache
60064c53
PH
1267 func_id = '%s_%s_%s' % (
1268 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1269 assert os.path.basename(func_id) == func_id
a0e07d31 1270
69ea8ca4 1271 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1272 if cache_spec is not None:
78caa52a 1273 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1274
6d1a55a5
PH
1275 download_note = (
1276 'Downloading player %s' % player_url
1277 if self._downloader.params.get('verbose') else
1278 'Downloading %s player %s' % (player_type, player_id)
1279 )
e0df6211
PH
1280 if player_type == 'js':
1281 code = self._download_webpage(
1282 player_url, video_id,
6d1a55a5 1283 note=download_note,
69ea8ca4 1284 errnote='Download of %s failed' % player_url)
83799698 1285 res = self._parse_sig_js(code)
c4417ddb 1286 elif player_type == 'swf':
e0df6211
PH
1287 urlh = self._request_webpage(
1288 player_url, video_id,
6d1a55a5 1289 note=download_note,
69ea8ca4 1290 errnote='Download of %s failed' % player_url)
e0df6211 1291 code = urlh.read()
83799698 1292 res = self._parse_sig_swf(code)
e0df6211
PH
1293 else:
1294 assert False, 'Invalid player type %r' % player_type
1295
785521bf
PH
1296 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1297 cache_res = res(test_string)
1298 cache_spec = [ord(c) for c in cache_res]
83799698 1299
69ea8ca4 1300 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1301 return res
1302
60064c53 1303 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1304 def gen_sig_code(idxs):
1305 def _genslice(start, end, step):
78caa52a 1306 starts = '' if start == 0 else str(start)
8bcc8756 1307 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1308 steps = '' if step == 1 else (':%d' % step)
78caa52a 1309 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1310
1311 step = None
7af808a5
PH
1312 # Quelch pyflakes warnings - start will be set when step is set
1313 start = '(Never used)'
edf3e38e
PH
1314 for i, prev in zip(idxs[1:], idxs[:-1]):
1315 if step is not None:
1316 if i - prev == step:
1317 continue
1318 yield _genslice(start, prev, step)
1319 step = None
1320 continue
1321 if i - prev in [-1, 1]:
1322 step = i - prev
1323 start = prev
1324 continue
1325 else:
78caa52a 1326 yield 's[%d]' % prev
edf3e38e 1327 if step is None:
78caa52a 1328 yield 's[%d]' % i
edf3e38e
PH
1329 else:
1330 yield _genslice(start, i, step)
1331
78caa52a 1332 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1333 cache_res = func(test_string)
edf3e38e 1334 cache_spec = [ord(c) for c in cache_res]
78caa52a 1335 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1336 signature_id_tuple = '(%s)' % (
1337 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1338 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1339 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1340 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1341
e0df6211
PH
1342 def _parse_sig_js(self, jscode):
1343 funcname = self._search_regex(
abefc03f
S
1344 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1345 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
c3cfea90 1346 r'\b(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1347 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1348 # Obsolete patterns
1349 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1350 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1351 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1352 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1353 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1354 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1355 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1356 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1357 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1358
1359 jsi = JSInterpreter(jscode)
1360 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1361 return lambda s: initial_function([s])
1362
1363 def _parse_sig_swf(self, file_contents):
54256267 1364 swfi = SWFInterpreter(file_contents)
78caa52a 1365 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1366 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1367 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1368 return lambda s: initial_function([s])
1369
83799698 1370 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1371 """Turn the encrypted s field into a working signature"""
6b37f0be 1372
c8bf86d5 1373 if player_url is None:
69ea8ca4 1374 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1375
69ea8ca4 1376 if player_url.startswith('//'):
78caa52a 1377 player_url = 'https:' + player_url
3c90cc8b
S
1378 elif not re.match(r'https?://', player_url):
1379 player_url = compat_urlparse.urljoin(
1380 'https://www.youtube.com', player_url)
c8bf86d5 1381 try:
62af3a0e 1382 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1383 if player_id not in self._player_cache:
1384 func = self._extract_signature_function(
60064c53 1385 video_id, player_url, s
c8bf86d5
PH
1386 )
1387 self._player_cache[player_id] = func
1388 func = self._player_cache[player_id]
1389 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1390 self._print_sig_code(func, s)
c8bf86d5
PH
1391 return func(s)
1392 except Exception as e:
1393 tb = traceback.format_exc()
1394 raise ExtractorError(
78caa52a 1395 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1396
360e1ca5 1397 def _get_subtitles(self, video_id, webpage):
de7f3446 1398 try:
60e47a26 1399 subs_doc = self._download_xml(
38c2e5b8 1400 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1401 video_id, note=False)
1402 except ExtractorError as err:
9b9c5355 1403 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1404 return {}
de7f3446
JMF
1405
1406 sub_lang_list = {}
60e47a26
JMF
1407 for track in subs_doc.findall('track'):
1408 lang = track.attrib['lang_code']
7e660ac1
LD
1409 if lang in sub_lang_list:
1410 continue
360e1ca5 1411 sub_formats = []
23d17e4b 1412 for ext in self._SUBTITLE_FORMATS:
15707c7e 1413 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1414 'lang': lang,
1415 'v': video_id,
1416 'fmt': ext,
1417 'name': track.attrib['name'].encode('utf-8'),
1418 })
1419 sub_formats.append({
1420 'url': 'https://www.youtube.com/api/timedtext?' + params,
1421 'ext': ext,
1422 })
1423 sub_lang_list[lang] = sub_formats
de7f3446 1424 if not sub_lang_list:
69ea8ca4 1425 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1426 return {}
1427 return sub_lang_list
1428
a72778d3
S
1429 def _get_ytplayer_config(self, video_id, webpage):
1430 patterns = (
526b3b07
S
1431 # User data may contain arbitrary character sequences that may affect
1432 # JSON extraction with regex, e.g. when '};' is contained the second
1433 # regex won't capture the whole JSON. Yet working around by trying more
1434 # concrete regex first keeping in mind proper quoted string handling
1435 # to be implemented in future that will replace this workaround (see
067aa17e
S
1436 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1437 # https://github.com/ytdl-org/youtube-dl/pull/7599)
a72778d3
S
1438 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1439 r';ytplayer\.config\s*=\s*({.+?});',
1440 )
1441 config = self._search_regex(
1442 patterns, webpage, 'ytplayer.config', default=None)
1443 if config:
1444 return self._parse_json(
1445 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1446
360e1ca5 1447 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1448 """We need the webpage for getting the captions url, pass it as an
1449 argument to speed up the process."""
69ea8ca4 1450 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1451 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1452 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1453 if not player_config:
de7f3446
JMF
1454 self._downloader.report_warning(err_msg)
1455 return {}
de7f3446 1456 try:
0792d563 1457 args = player_config['args']
b78b292f
S
1458 caption_url = args.get('ttsurl')
1459 if caption_url:
1460 timestamp = args['timestamp']
1461 # We get the available subtitles
15707c7e 1462 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1463 'type': 'list',
1464 'tlangs': 1,
1465 'asrs': 1,
1466 })
1467 list_url = caption_url + '&' + list_params
1468 caption_list = self._download_xml(list_url, video_id)
1469 original_lang_node = caption_list.find('track')
1470 if original_lang_node is None:
1471 self._downloader.report_warning('Video doesn\'t have automatic captions')
1472 return {}
1473 original_lang = original_lang_node.attrib['lang_code']
1474 caption_kind = original_lang_node.attrib.get('kind', '')
1475
1476 sub_lang_list = {}
1477 for lang_node in caption_list.findall('target'):
1478 sub_lang = lang_node.attrib['lang_code']
1479 sub_formats = []
1480 for ext in self._SUBTITLE_FORMATS:
15707c7e 1481 params = compat_urllib_parse_urlencode({
b78b292f
S
1482 'lang': original_lang,
1483 'tlang': sub_lang,
1484 'fmt': ext,
1485 'ts': timestamp,
1486 'kind': caption_kind,
1487 })
1488 sub_formats.append({
1489 'url': caption_url + '&' + params,
1490 'ext': ext,
1491 })
1492 sub_lang_list[sub_lang] = sub_formats
1493 return sub_lang_list
1494
ddbb4c5c
S
1495 def make_captions(sub_url, sub_langs):
1496 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1497 caption_qs = compat_parse_qs(parsed_sub_url.query)
1498 captions = {}
1499 for sub_lang in sub_langs:
1500 sub_formats = []
1501 for ext in self._SUBTITLE_FORMATS:
1502 caption_qs.update({
1503 'tlang': [sub_lang],
1504 'fmt': [ext],
1505 })
1506 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1507 query=compat_urllib_parse_urlencode(caption_qs, True)))
1508 sub_formats.append({
1509 'url': sub_url,
1510 'ext': ext,
1511 })
1512 captions[sub_lang] = sub_formats
1513 return captions
1514
1515 # New captions format as of 22.06.2017
1516 player_response = args.get('player_response')
1517 if player_response and isinstance(player_response, compat_str):
1518 player_response = self._parse_json(
1519 player_response, video_id, fatal=False)
1520 if player_response:
1521 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1522 base_url = renderer['captionTracks'][0]['baseUrl']
1523 sub_lang_list = []
1524 for lang in renderer['translationLanguages']:
1525 lang_code = lang.get('languageCode')
1526 if lang_code:
1527 sub_lang_list.append(lang_code)
1528 return make_captions(base_url, sub_lang_list)
1529
b78b292f
S
1530 # Some videos don't provide ttsurl but rather caption_tracks and
1531 # caption_translation_languages (e.g. 20LmZk1hakA)
ddbb4c5c 1532 # Does not used anymore as of 22.06.2017
b78b292f
S
1533 caption_tracks = args['caption_tracks']
1534 caption_translation_languages = args['caption_translation_languages']
1535 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
ddbb4c5c 1536 sub_lang_list = []
b78b292f
S
1537 for lang in caption_translation_languages.split(','):
1538 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1539 sub_lang = lang_qs.get('lc', [None])[0]
ddbb4c5c
S
1540 if sub_lang:
1541 sub_lang_list.append(sub_lang)
1542 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1543 # An extractor error can be raise by the download process if there are
1544 # no automatic captions but there are subtitles
ddbb4c5c 1545 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1546 self._downloader.report_warning(err_msg)
1547 return {}
1548
21c340b8
S
1549 def _mark_watched(self, video_id, video_info, player_response):
1550 playback_url = url_or_none(try_get(
1551 player_response,
1552 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1553 video_info, lambda x: x['videostats_playback_base_url'][0]))
d77ab8e2
S
1554 if not playback_url:
1555 return
1556 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1557 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1558
1559 # cpn generation algorithm is reverse engineered from base.js.
1560 # In fact it works even with dummy cpn.
1561 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1562 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1563
1564 qs.update({
1565 'ver': ['2'],
1566 'cpn': [cpn],
1567 })
1568 playback_url = compat_urlparse.urlunparse(
15707c7e 1569 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1570
1571 self._download_webpage(
1572 playback_url, video_id, 'Marking watched',
1573 'Unable to mark watched', fatal=False)
1574
66c9fa36
S
1575 @staticmethod
1576 def _extract_urls(webpage):
1577 # Embedded YouTube player
1578 entries = [
1579 unescapeHTML(mobj.group('url'))
1580 for mobj in re.finditer(r'''(?x)
1581 (?:
1582 <iframe[^>]+?src=|
1583 data-video-url=|
1584 <embed[^>]+?src=|
1585 embedSWF\(?:\s*|
1586 <object[^>]+data=|
1587 new\s+SWFObject\(
1588 )
1589 (["\'])
1590 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1591 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1592 \1''', webpage)]
1593
1594 # lazyYT YouTube embed
1595 entries.extend(list(map(
1596 unescapeHTML,
1597 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1598
1599 # Wordpress "YouTube Video Importer" plugin
1600 matches = re.findall(r'''(?x)<div[^>]+
1601 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1602 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1603 entries.extend(m[-1] for m in matches)
1604
1605 return entries
1606
1607 @staticmethod
1608 def _extract_url(webpage):
1609 urls = YoutubeIE._extract_urls(webpage)
1610 return urls[0] if urls else None
1611
97665381
PH
1612 @classmethod
1613 def extract_id(cls, url):
1614 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1615 if mobj is None:
69ea8ca4 1616 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1617 video_id = mobj.group(2)
1618 return video_id
1619
9cafc3fd
S
1620 @staticmethod
1621 def _extract_chapters(description, duration):
1622 if not description:
1623 return None
1624 chapter_lines = re.findall(
1625 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1626 description)
1627 if not chapter_lines:
1628 return None
1629 chapters = []
1630 for next_num, (chapter_line, time_point) in enumerate(
1631 chapter_lines, start=1):
1632 start_time = parse_duration(time_point)
1633 if start_time is None:
1634 continue
39d4c1be
S
1635 if start_time > duration:
1636 break
9cafc3fd
S
1637 end_time = (duration if next_num == len(chapter_lines)
1638 else parse_duration(chapter_lines[next_num][1]))
1639 if end_time is None:
1640 continue
39d4c1be
S
1641 if end_time > duration:
1642 end_time = duration
1643 if start_time > end_time:
1644 break
9cafc3fd
S
1645 chapter_title = re.sub(
1646 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1647 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1648 chapters.append({
1649 'start_time': start_time,
1650 'end_time': end_time,
1651 'title': chapter_title,
1652 })
1653 return chapters
1654
c5e8d7af 1655 def _real_extract(self, url):
cf7e015f
S
1656 url, smuggled_data = unsmuggle_url(url, {})
1657
7e8c0af0 1658 proto = (
78caa52a
PH
1659 'http' if self._downloader.params.get('prefer_insecure', False)
1660 else 'https')
7e8c0af0 1661
7c80519c 1662 start_time = None
297a564b 1663 end_time = None
7c80519c
JMF
1664 parsed_url = compat_urllib_parse_urlparse(url)
1665 for component in [parsed_url.fragment, parsed_url.query]:
1666 query = compat_parse_qs(component)
297a564b 1667 if start_time is None and 't' in query:
7c80519c 1668 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1669 if start_time is None and 'start' in query:
1670 start_time = parse_duration(query['start'][0])
297a564b
JMF
1671 if end_time is None and 'end' in query:
1672 end_time = parse_duration(query['end'][0])
7c80519c 1673
c5e8d7af
PH
1674 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1675 mobj = re.search(self._NEXT_URL_RE, url)
1676 if mobj:
7fd002c0 1677 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1678 video_id = self.extract_id(url)
c5e8d7af
PH
1679
1680 # Get video webpage
aa79ac0c 1681 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
a1f934b1 1682 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
1683
1684 # Attempt to extract SWF player URL
e0df6211 1685 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1686 if mobj is not None:
1687 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1688 else:
1689 player_url = None
1690
d8d24a92
S
1691 dash_mpds = []
1692
1693 def add_dash_mpd(video_info):
1694 dash_mpd = video_info.get('dashmpd')
1695 if dash_mpd and dash_mpd[0] not in dash_mpds:
1696 dash_mpds.append(dash_mpd[0])
1697
561b456e
S
1698 def add_dash_mpd_pr(pl_response):
1699 dash_mpd = url_or_none(try_get(
1700 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1701 compat_str))
1702 if dash_mpd and dash_mpd not in dash_mpds:
1703 dash_mpds.append(dash_mpd)
1704
c7121fa7
S
1705 is_live = None
1706 view_count = None
1707
1708 def extract_view_count(v_info):
1709 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1710
026fbedc
S
1711 def extract_token(v_info):
1712 return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
1713
c2d125d9
S
1714 def extract_player_response(player_response, video_id):
1715 pl_response = str_or_none(player_response)
1716 if not pl_response:
1717 return
1718 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1719 if isinstance(pl_response, dict):
1720 add_dash_mpd_pr(pl_response)
1721 return pl_response
1722
dbdaaa23
S
1723 player_response = {}
1724
c5e8d7af 1725 # Get video info
6449cd80 1726 embed_webpage = None
c108eb73 1727 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
1728 age_gate = True
1729 # We simulate the access to the video from www.youtube.com/v/{video_id}
1730 # this can be viewed without login into Youtube
beb95e77
CL
1731 url = proto + '://www.youtube.com/embed/%s' % video_id
1732 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
15707c7e 1733 data = compat_urllib_parse_urlencode({
2c57c7fa
JMF
1734 'video_id': video_id,
1735 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 1736 'sts': self._search_regex(
beb95e77 1737 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 1738 })
7e8c0af0 1739 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
94bd3613
PH
1740 video_info_webpage = self._download_webpage(
1741 video_info_url, video_id,
20436c30 1742 note='Refetching age-gated info webpage',
94bd3613 1743 errnote='unable to download video info webpage')
c5e8d7af 1744 video_info = compat_parse_qs(video_info_webpage)
c2d125d9
S
1745 pl_response = video_info.get('player_response', [None])[0]
1746 player_response = extract_player_response(pl_response, video_id)
d8d24a92 1747 add_dash_mpd(video_info)
c2d125d9 1748 view_count = extract_view_count(video_info)
c108eb73
JMF
1749 else:
1750 age_gate = False
bc93bdb5 1751 video_info = None
dc4e4f90 1752 sts = None
d8d24a92 1753 # Try looking directly into the video webpage
a72778d3
S
1754 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1755 if ytplayer_config:
4e62ebe2 1756 args = ytplayer_config['args']
4c76aa06 1757 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1758 # Convert to the same format returned by compat_parse_qs
1759 video_info = dict((k, [v]) for k, v in args.items())
1760 add_dash_mpd(video_info)
6496ccb4
S
1761 # Rental video is not rented but preview is available (e.g.
1762 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
067aa17e 1763 # https://github.com/ytdl-org/youtube-dl/issues/10532)
6496ccb4
S
1764 if not video_info and args.get('ypc_vid'):
1765 return self.url_result(
1766 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1767 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1768 is_live = True
dc4e4f90 1769 sts = ytplayer_config.get('sts')
dbdaaa23 1770 if not player_response:
c2d125d9 1771 player_response = extract_player_response(args.get('player_response'), video_id)
0a3cf9ad 1772 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
561b456e 1773 add_dash_mpd_pr(player_response)
0a3cf9ad
S
1774 # We also try looking in get_video_info since it may contain different dashmpd
1775 # URL that points to a DASH manifest with possibly different itag set (some itags
1776 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1777 # manifest pointed by get_video_info's dashmpd).
1778 # The general idea is to take a union of itags of both DASH manifests (for example
067aa17e 1779 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
4e62ebe2 1780 self.report_video_info_webpage_download(video_id)
a61ce714 1781 for el in ('embedded', 'detailpage', 'vevo', ''):
dc4e4f90
S
1782 query = {
1783 'video_id': video_id,
1784 'ps': 'default',
1785 'eurl': '',
1786 'gl': 'US',
1787 'hl': 'en',
1788 }
1789 if el:
1790 query['el'] = el
1791 if sts:
1792 query['sts'] = sts
810fb84d 1793 video_info_webpage = self._download_webpage(
dc4e4f90 1794 '%s://www.youtube.com/get_video_info' % proto,
4e62ebe2 1795 video_id, note=False,
dc4e4f90
S
1796 errnote='unable to download video info webpage',
1797 fatal=False, query=query)
1798 if not video_info_webpage:
1799 continue
0a3cf9ad 1800 get_video_info = compat_parse_qs(video_info_webpage)
dbdaaa23
S
1801 if not player_response:
1802 pl_response = get_video_info.get('player_response', [None])[0]
c2d125d9 1803 player_response = extract_player_response(pl_response, video_id)
fd545fc6 1804 add_dash_mpd(get_video_info)
c7121fa7
S
1805 if view_count is None:
1806 view_count = extract_view_count(get_video_info)
0a3cf9ad
S
1807 if not video_info:
1808 video_info = get_video_info
026fbedc 1809 get_token = extract_token(get_video_info)
56667d62 1810 if get_token:
89ea063e
S
1811 # Different get_video_info requests may report different results, e.g.
1812 # some may report video unavailability, but some may serve it without
067aa17e 1813 # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
89ea063e
S
1814 # the original webpage as well as el=info and el=embedded get_video_info
1815 # requests report video unavailability due to geo restriction while
1816 # el=detailpage succeeds and returns valid data). This is probably
1817 # due to YouTube measures against IP ranges of hosting providers.
1818 # Working around by preferring the first succeeded video_info containing
1819 # the token if no such video_info yet was found.
026fbedc 1820 token = extract_token(video_info)
56667d62 1821 if not token:
44b2264f 1822 video_info = get_video_info
4e62ebe2 1823 break
bbb7c3f7
YCH
1824
1825 def extract_unavailable_message():
0add33ab
S
1826 messages = []
1827 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1828 msg = self._html_search_regex(
1829 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1830 video_webpage, 'unavailable %s' % kind, default=None)
1831 if msg:
1832 messages.append(msg)
1833 if messages:
1834 return '\n'.join(messages)
bbb7c3f7 1835
15be3eb5
RA
1836 if not video_info:
1837 unavailable_message = extract_unavailable_message()
1838 if not unavailable_message:
1839 unavailable_message = 'Unable to extract video data'
1840 raise ExtractorError(
1841 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1842
dbdaaa23
S
1843 video_details = try_get(
1844 player_response, lambda x: x['videoDetails'], dict) or {}
1845
8dbf751a
RA
1846 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1847 if not video_title:
cf7e015f
S
1848 self._downloader.report_warning('Unable to extract video title')
1849 video_title = '_'
1850
9cafc3fd 1851 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1852 if video_description:
fa4bc6e7
RA
1853
1854 def replace_url(m):
1855 redir_url = compat_urlparse.urljoin(url, m.group(1))
1856 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1857 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1858 qs = compat_parse_qs(parsed_redir_url.query)
1859 q = qs.get('q')
1860 if q and q[0]:
1861 return q[0]
1862 return redir_url
1863
9cafc3fd 1864 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1865 <a\s+
25cb7a0e 1866 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1867 (?:title|href)="([^"]+)"\s+
25cb7a0e 1868 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1869 class="[^"]*"[^>]*>
23f13e97 1870 [^<]+\.{3}\s*
cf7e015f 1871 </a>
fa4bc6e7 1872 ''', replace_url, video_description)
cf7e015f
S
1873 video_description = clean_html(video_description)
1874 else:
8dbf751a 1875 video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
cf7e015f 1876
8fe10494 1877 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1878 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1879 multifeed_metadata_list = try_get(
1880 player_response,
1881 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1882 compat_str) or try_get(
1883 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1884 if multifeed_metadata_list:
1885 entries = []
1886 feed_ids = []
1887 for feed in multifeed_metadata_list.split(','):
1888 # Unquote should take place before split on comma (,) since textual
1889 # fields may contain comma as well (see
067aa17e 1890 # https://github.com/ytdl-org/youtube-dl/issues/8536)
8fe10494
S
1891 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1892 entries.append({
1893 '_type': 'url_transparent',
1894 'ie_key': 'Youtube',
1895 'url': smuggle_url(
1896 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1897 {'force_singlefeed': True}),
1898 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1899 })
1900 feed_ids.append(feed_data['id'][0])
1901 self.to_screen(
1902 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1903 % (', '.join(feed_ids), video_id))
1904 return self.playlist_result(entries, video_id, video_title, video_description)
1905 else:
1906 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1907
c7121fa7 1908 if view_count is None:
1c9c8de2 1909 view_count = extract_view_count(video_info)
dbdaaa23
S
1910 if view_count is None and video_details:
1911 view_count = int_or_none(video_details.get('viewCount'))
1d699755 1912
27019dbb 1913 if is_live is None:
898238e9 1914 is_live = bool_or_none(video_details.get('isLive'))
27019dbb 1915
c5e8d7af
PH
1916 # Check for "rental" videos
1917 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
067aa17e 1918 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 1919
c63ca0ee
S
1920 def _extract_filesize(media_url):
1921 return int_or_none(self._search_regex(
1922 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1923
bf1317d2
S
1924 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1925 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1926
c5e8d7af
PH
1927 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1928 self.report_rtmp_download()
dd27fd17
PH
1929 formats = [{
1930 'format_id': '_rtmp',
1931 'protocol': 'rtmp',
1932 'url': video_info['conn'][0],
1933 'player_url': player_url,
1934 }]
bf1317d2 1935 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 1936 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1937 if 'rtmpe%3Dyes' in encoded_url_map:
067aa17e 1938 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
bf1317d2 1939 formats = []
3318832e 1940 formats_spec = {}
82156fdb 1941 fmt_list = video_info.get('fmt_list', [''])[0]
1942 if fmt_list:
1943 for fmt in fmt_list.split(','):
1944 spec = fmt.split('/')
3318832e 1945 if len(spec) > 1:
1946 width_height = spec[1].split('x')
1947 if len(width_height) == 2:
1948 formats_spec[spec[0]] = {
1949 'resolution': spec[1],
1950 'width': int_or_none(width_height[0]),
1951 'height': int_or_none(width_height[1]),
1952 }
bf1317d2
S
1953 for fmt in streaming_formats:
1954 itag = str_or_none(fmt.get('itag'))
1955 if not itag:
201e9eaa 1956 continue
bf1317d2
S
1957 quality = fmt.get('quality')
1958 quality_label = fmt.get('qualityLabel') or quality
1959 formats_spec[itag] = {
1960 'asr': int_or_none(fmt.get('audioSampleRate')),
1961 'filesize': int_or_none(fmt.get('contentLength')),
1962 'format_note': quality_label,
1963 'fps': int_or_none(fmt.get('fps')),
1964 'height': int_or_none(fmt.get('height')),
bf1317d2
S
1965 # bitrate for itag 43 is always 2147483647
1966 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1967 'width': int_or_none(fmt.get('width')),
1968 }
1969
1970 for fmt in streaming_formats:
1971 if fmt.get('drm_families'):
1972 continue
1973 url = url_or_none(fmt.get('url'))
1974
1975 if not url:
1976 cipher = fmt.get('cipher')
1977 if not cipher:
1978 continue
1979 url_data = compat_parse_qs(cipher)
1980 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1981 if not url:
1982 continue
1983 else:
1984 cipher = None
1985 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1986
2f483bc1
S
1987 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1988 # Unsupported FORMAT_STREAM_TYPE_OTF
1989 if stream_type == 3:
1990 continue
6449cd80 1991
bf1317d2
S
1992 format_id = fmt.get('itag') or url_data['itag'][0]
1993 if not format_id:
1994 continue
1995 format_id = compat_str(format_id)
a49eccdf 1996
bf1317d2
S
1997 if cipher:
1998 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1999 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
2000 jsplayer_url_json = self._search_regex(
2001 ASSETS_RE,
2002 embed_webpage if age_gate else video_webpage,
2003 'JS player URL (1)', default=None)
2004 if not jsplayer_url_json and not age_gate:
2005 # We need the embed website after all
2006 if embed_webpage is None:
2007 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2008 embed_webpage = self._download_webpage(
2009 embed_url, video_id, 'Downloading embed webpage')
2010 jsplayer_url_json = self._search_regex(
2011 ASSETS_RE, embed_webpage, 'JS player URL')
2012
2013 player_url = json.loads(jsplayer_url_json)
cf010131 2014 if player_url is None:
bf1317d2
S
2015 player_url_json = self._search_regex(
2016 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2017 video_webpage, 'age gate player URL')
2018 player_url = json.loads(player_url_json)
2019
2020 if 'sig' in url_data:
2021 url += '&signature=' + url_data['sig'][0]
2022 elif 's' in url_data:
2023 encrypted_sig = url_data['s'][0]
2024
2025 if self._downloader.params.get('verbose'):
2026 if player_url is None:
2027 player_version = 'unknown'
2028 player_desc = 'unknown'
cf010131 2029 else:
bf1317d2
S
2030 if player_url.endswith('swf'):
2031 player_version = self._search_regex(
2032 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
2033 'flash player', fatal=False)
2034 player_desc = 'flash player %s' % player_version
2035 else:
2036 player_version = self._search_regex(
2037 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
dc879c5a 2038 r'(?:www|player(?:_ias)?)[-.]([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
bf1317d2
S
2039 player_url,
2040 'html5 player', fatal=False)
2041 player_desc = 'html5 player %s' % player_version
2042
2043 parts_sizes = self._signature_cache_id(encrypted_sig)
2044 self.to_screen('{%s} signature length %s, %s' %
2045 (format_id, parts_sizes, player_desc))
2046
2047 signature = self._decrypt_signature(
2048 encrypted_sig, video_id, player_url, age_gate)
2049 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2050 url += '&%s=%s' % (sp, signature)
201e9eaa
PH
2051 if 'ratebypass' not in url:
2052 url += '&ratebypass=yes'
c9afb51c 2053
94278f72
YCH
2054 dct = {
2055 'format_id': format_id,
2056 'url': url,
2057 'player_url': player_url,
2058 }
2059 if format_id in self._formats:
2060 dct.update(self._formats[format_id])
3318832e 2061 if format_id in formats_spec:
2062 dct.update(formats_spec[format_id])
94278f72 2063
aabc2be6 2064 # Some itags are not included in DASH manifest thus corresponding formats will
067aa17e 2065 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
aabc2be6
S
2066 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2067 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2068 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 2069
bf1317d2
S
2070 if width is None:
2071 width = int_or_none(fmt.get('width'))
2072 if height is None:
2073 height = int_or_none(fmt.get('height'))
2074
c63ca0ee
S
2075 filesize = int_or_none(url_data.get(
2076 'clen', [None])[0]) or _extract_filesize(url)
2077
bf1317d2
S
2078 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2079 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2080
4878759f
S
2081 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2082 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
bf1317d2 2083 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
54fc90aa 2084
94278f72 2085 more_fields = {
c63ca0ee 2086 'filesize': filesize,
bf1317d2 2087 'tbr': tbr,
c9afb51c
AH
2088 'width': width,
2089 'height': height,
bf1317d2
S
2090 'fps': fps,
2091 'format_note': quality_label or quality,
c9afb51c 2092 }
94278f72
YCH
2093 for key, value in more_fields.items():
2094 if value:
2095 dct[key] = value
bf1317d2 2096 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
aabc2be6
S
2097 if type_:
2098 type_split = type_.split(';')
2099 kind_ext = type_split[0].split('/')
2100 if len(kind_ext) == 2:
94278f72
YCH
2101 kind, _ = kind_ext
2102 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
2103 if kind in ('audio', 'video'):
2104 codecs = None
2105 for mobj in re.finditer(
2106 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2107 if mobj.group('key') == 'codecs':
2108 codecs = mobj.group('val')
2109 break
2110 if codecs:
6310acf5 2111 dct.update(parse_codecs(codecs))
e4a60912
S
2112 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2113 dct['downloader_options'] = {
2114 # Youtube throttles chunks >~10M
2115 'http_chunk_size': 10485760,
2116 }
aabc2be6 2117 formats.append(dct)
c5e8d7af 2118 else:
c3e54389
S
2119 manifest_url = (
2120 url_or_none(try_get(
2121 player_response,
2122 lambda x: x['streamingData']['hlsManifestUrl'],
3089bc74
S
2123 compat_str))
2124 or url_or_none(try_get(
c3e54389
S
2125 video_info, lambda x: x['hlsvp'][0], compat_str)))
2126 if manifest_url:
2127 formats = []
2128 m3u8_formats = self._extract_m3u8_formats(
2129 manifest_url, video_id, 'mp4', fatal=False)
2130 for a_format in m3u8_formats:
2131 itag = self._search_regex(
2132 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2133 if itag:
2134 a_format['format_id'] = itag
2135 if itag in self._formats:
2136 dct = self._formats[itag].copy()
2137 dct.update(a_format)
2138 a_format = dct
2139 a_format['player_url'] = player_url
2140 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2141 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2142 formats.append(a_format)
2143 else:
13577349 2144 error_message = extract_unavailable_message()
c3e54389 2145 if not error_message:
13577349
S
2146 error_message = clean_html(try_get(
2147 player_response, lambda x: x['playabilityStatus']['reason'],
2148 compat_str))
2149 if not error_message:
2150 error_message = clean_html(
2151 try_get(video_info, lambda x: x['reason'][0], compat_str))
c3e54389
S
2152 if error_message:
2153 raise ExtractorError(error_message, expected=True)
2154 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 2155
7e72694b 2156 # uploader
dbdaaa23
S
2157 video_uploader = try_get(
2158 video_info, lambda x: x['author'][0],
2159 compat_str) or str_or_none(video_details.get('author'))
7e72694b
S
2160 if video_uploader:
2161 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2162 else:
2163 self._downloader.report_warning('unable to extract uploader name')
2164
2165 # uploader_id
2166 video_uploader_id = None
2167 video_uploader_url = None
2168 mobj = re.search(
2169 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2170 video_webpage)
2171 if mobj is not None:
2172 video_uploader_id = mobj.group('uploader_id')
2173 video_uploader_url = mobj.group('uploader_url')
2174 else:
2175 self._downloader.report_warning('unable to extract uploader nickname')
2176
b45a9e69 2177 channel_id = (
3089bc74
S
2178 str_or_none(video_details.get('channelId'))
2179 or self._html_search_meta(
2180 'channelId', video_webpage, 'channel id', default=None)
2181 or self._search_regex(
b45a9e69 2182 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2183 video_webpage, 'channel id', default=None, group='id'))
dd4c4492
S
2184 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2185
7e72694b
S
2186 # thumbnail image
2187 # We try first to get a high quality image:
2188 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2189 video_webpage, re.DOTALL)
2190 if m_thumb is not None:
2191 video_thumbnail = m_thumb.group(1)
2192 elif 'thumbnail_url' not in video_info:
2193 self._downloader.report_warning('unable to extract video thumbnail')
2194 video_thumbnail = None
2195 else: # don't panic if we can't find it
2196 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2197
2198 # upload date
2199 upload_date = self._html_search_meta(
2200 'datePublished', video_webpage, 'upload date', default=None)
2201 if not upload_date:
2202 upload_date = self._search_regex(
2203 [r'(?s)id="eow-date.*?>(.*?)</span>',
2204 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2205 video_webpage, 'upload date', default=None)
2206 upload_date = unified_strdate(upload_date)
2207
2208 video_license = self._html_search_regex(
2209 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2210 video_webpage, 'license', default=None)
2211
2212 m_music = re.search(
2213 r'''(?x)
2214 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2215 <ul[^>]*>\s*
2216 <li>(?P<title>.+?)
2217 by (?P<creator>.+?)
2218 (?:
2219 \(.+?\)|
2220 <a[^>]*
2221 (?:
2222 \bhref=["\']/red[^>]*>| # drop possible
2223 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2224 )
2225 .*?
2226 )?</li
2227 ''',
2228 video_webpage)
2229 if m_music:
2230 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2231 video_creator = clean_html(m_music.group('creator'))
2232 else:
2233 video_alt_title = video_creator = None
2234
2235 def extract_meta(field):
2236 return self._html_search_regex(
2237 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2238 video_webpage, field, default=None)
2239
2240 track = extract_meta('Song')
2241 artist = extract_meta('Artist')
92bc97d3 2242 album = extract_meta('Album')
822b9d9c
RA
2243
2244 # Youtube Music Auto-generated description
92bc97d3 2245 release_date = release_year = None
822b9d9c
RA
2246 if video_description:
2247 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2248 if mobj:
2249 if not track:
2250 track = mobj.group('track').strip()
2251 if not artist:
2252 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
92bc97d3
RA
2253 if not album:
2254 album = mobj.group('album'.strip())
822b9d9c
RA
2255 release_year = mobj.group('release_year')
2256 release_date = mobj.group('release_date')
2257 if release_date:
2258 release_date = release_date.replace('-', '')
2259 if not release_year:
2260 release_year = int(release_date[:4])
2261 if release_year:
2262 release_year = int(release_year)
7e72694b
S
2263
2264 m_episode = re.search(
2265 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2266 video_webpage)
2267 if m_episode:
c2dd2dc0 2268 series = unescapeHTML(m_episode.group('series'))
7e72694b
S
2269 season_number = int(m_episode.group('season'))
2270 episode_number = int(m_episode.group('episode'))
2271 else:
2272 series = season_number = episode_number = None
2273
2274 m_cat_container = self._search_regex(
2275 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2276 video_webpage, 'categories', default=None)
2277 if m_cat_container:
2278 category = self._html_search_regex(
2279 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2280 default=None)
2281 video_categories = None if category is None else [category]
2282 else:
2283 video_categories = None
2284
2285 video_tags = [
2286 unescapeHTML(m.group('content'))
2287 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2288
2289 def _extract_count(count_name):
2290 return str_to_int(self._search_regex(
2291 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2292 % re.escape(count_name),
2293 video_webpage, count_name, default=None))
2294
2295 like_count = _extract_count('like')
2296 dislike_count = _extract_count('dislike')
2297
dbdaaa23
S
2298 if view_count is None:
2299 view_count = str_to_int(self._search_regex(
2300 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2301 'view count', default=None))
2302
bf3c9326
S
2303 average_rating = (
2304 float_or_none(video_details.get('averageRating'))
2305 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2306
7e72694b
S
2307 # subtitles
2308 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2309 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2310
2311 video_duration = try_get(
2312 video_info, lambda x: int_or_none(x['length_seconds'][0]))
dbdaaa23
S
2313 if not video_duration:
2314 video_duration = int_or_none(video_details.get('lengthSeconds'))
7e72694b
S
2315 if not video_duration:
2316 video_duration = parse_duration(self._html_search_meta(
2317 'duration', video_webpage, 'video duration'))
2318
2319 # annotations
2320 video_annotations = None
2321 if self._downloader.params.get('writeannotations', False):
64b6a4e9
RA
2322 xsrf_token = self._search_regex(
2323 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2324 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2325 invideo_url = try_get(
2326 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2327 if xsrf_token and invideo_url:
2328 xsrf_field_name = self._search_regex(
2329 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2330 video_webpage, 'xsrf field name',
2331 group='xsrf_field_name', default='session_token')
2332 video_annotations = self._download_webpage(
2333 self._proto_relative_url(invideo_url),
2334 video_id, note='Downloading annotations',
2335 errnote='Unable to download video annotations', fatal=False,
2336 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b
S
2337
2338 chapters = self._extract_chapters(description_original, video_duration)
2339
dd27fd17 2340 # Look for the DASH manifest
203fb43f 2341 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2342 dash_mpd_fatal = True
8ff648e4 2343 for mpd_url in dash_mpds:
d8d24a92 2344 dash_formats = {}
774e208f 2345 try:
05d0d131
YCH
2346 def decrypt_sig(mobj):
2347 s = mobj.group(1)
2348 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2349 return '/signature/%s' % dec_s
2350
8ff648e4 2351 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2352
8ff648e4 2353 for df in self._extract_mpd_formats(
2354 mpd_url, video_id, fatal=dash_mpd_fatal,
2355 formats_dict=self._formats):
c63ca0ee
S
2356 if not df.get('filesize'):
2357 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2358 # Do not overwrite DASH format found in some previous DASH manifest
2359 if df['format_id'] not in dash_formats:
2360 dash_formats[df['format_id']] = df
77c6fb5b
S
2361 # Additional DASH manifests may end up in HTTP Error 403 therefore
2362 # allow them to fail without bug report message if we already have
2363 # some DASH manifest succeeded. This is temporary workaround to reduce
2364 # burst of bug reports until we figure out the reason and whether it
2365 # can be fixed at all.
2366 dash_mpd_fatal = False
774e208f
PH
2367 except (ExtractorError, KeyError) as e:
2368 self.report_warning(
2369 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2370 if dash_formats:
04b3b3df
JMF
2371 # Remove the formats we found through non-DASH, they
2372 # contain less info and it can be wrong, because we use
2373 # fixed values (for example the resolution). See
067aa17e 2374 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
04b3b3df 2375 # example.
d80265cc 2376 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2377 formats.extend(dash_formats.values())
d80044c2 2378
6271f1ca
PH
2379 # Check for malformed aspect ratio
2380 stretched_m = re.search(
2381 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2382 video_webpage)
2383 if stretched_m:
313dfc45
LL
2384 w = float(stretched_m.group('w'))
2385 h = float(stretched_m.group('h'))
5faf9fed
S
2386 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2387 # We will only process correct ratios.
313dfc45 2388 if w > 0 and h > 0:
41f24c32 2389 ratio = w / h
313dfc45
LL
2390 for f in formats:
2391 if f.get('vcodec') != 'none':
2392 f['stretched_ratio'] = ratio
6271f1ca 2393
026fbedc
S
2394 if not formats:
2395 token = extract_token(video_info)
2396 if not token:
2397 if 'reason' in video_info:
2398 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2399 regions_allowed = self._html_search_meta(
2400 'regionsAllowed', video_webpage, default=None)
2401 countries = regions_allowed.split(',') if regions_allowed else None
2402 self.raise_geo_restricted(
2403 msg=video_info['reason'][0], countries=countries)
2404 reason = video_info['reason'][0]
2405 if 'Invalid parameters' in reason:
2406 unavailable_message = extract_unavailable_message()
2407 if unavailable_message:
2408 reason = unavailable_message
2409 raise ExtractorError(
2410 'YouTube said: %s' % reason,
2411 expected=True, video_id=video_id)
2412 else:
2413 raise ExtractorError(
2414 '"token" parameter not in video info for unknown reason',
2415 video_id=video_id)
2416
0d297518
RA
2417 if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
2418 raise ExtractorError('This video is DRM protected.', expected=True)
2419
4bcc7bd1 2420 self._sort_formats(formats)
4ea3be0a 2421
21c340b8 2422 self.mark_watched(video_id, video_info, player_response)
d77ab8e2 2423
4ea3be0a 2424 return {
8bcc8756
JW
2425 'id': video_id,
2426 'uploader': video_uploader,
2427 'uploader_id': video_uploader_id,
fd050249 2428 'uploader_url': video_uploader_url,
dd4c4492
S
2429 'channel_id': channel_id,
2430 'channel_url': channel_url,
8bcc8756 2431 'upload_date': upload_date,
7caf9830 2432 'license': video_license,
936784b2 2433 'creator': video_creator or artist,
8bcc8756 2434 'title': video_title,
936784b2 2435 'alt_title': video_alt_title or track,
8bcc8756
JW
2436 'thumbnail': video_thumbnail,
2437 'description': video_description,
2438 'categories': video_categories,
000b6b5a 2439 'tags': video_tags,
8bcc8756 2440 'subtitles': video_subtitles,
360e1ca5 2441 'automatic_captions': automatic_captions,
8bcc8756
JW
2442 'duration': video_duration,
2443 'age_limit': 18 if age_gate else 0,
2444 'annotations': video_annotations,
9cafc3fd 2445 'chapters': chapters,
7e8c0af0 2446 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2447 'view_count': view_count,
4ea3be0a 2448 'like_count': like_count,
2449 'dislike_count': dislike_count,
bf3c9326 2450 'average_rating': average_rating,
8bcc8756 2451 'formats': formats,
2fe1ff85 2452 'is_live': is_live,
7c80519c 2453 'start_time': start_time,
297a564b 2454 'end_time': end_time,
12afdc2a
S
2455 'series': series,
2456 'season_number': season_number,
2457 'episode_number': episode_number,
936784b2
S
2458 'track': track,
2459 'artist': artist,
5caabd3c 2460 'album': album,
2461 'release_date': release_date,
2462 'release_year': release_year,
4ea3be0a 2463 }
c5e8d7af 2464
5f6a1245 2465
8e7aad20 2466class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2467 IE_DESC = 'YouTube.com playlists'
d67cc9fa 2468 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
2469 (?:https?://)?
2470 (?:\w+\.)?
c5e8d7af 2471 (?:
c0345b82 2472 (?:
66b48727 2473 youtube(?:kids)?\.com|
c0345b82
S
2474 invidio\.us
2475 )
2476 /
feaa5ad7 2477 (?:
87dadd45 2478 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
feaa5ad7
S
2479 \? (?:.*?[&;])*? (?:p|a|list)=
2480 | p/
2481 )|
2482 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
c5e8d7af 2483 )
d67cc9fa 2484 (
66b48727 2485 (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
5f6a1245 2486 # Top tracks, they can also include dots
d67cc9fa
JMF
2487 |(?:MC)[\w\.]*
2488 )
c5e8d7af
PH
2489 .*
2490 |
d0ba5587
S
2491 (%(playlist_id)s)
2492 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
8d81f3e3 2493 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
351f37c0
S
2494 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2495 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
78caa52a 2496 IE_NAME = 'youtube:playlist'
81127aa5 2497 _TESTS = [{
0e30a7b9 2498 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2499 'info_dict': {
0e30a7b9 2500 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2501 'uploader': 'Sergey M.',
2502 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2503 'title': 'youtube-dl public playlist',
81127aa5 2504 },
0e30a7b9 2505 'playlist_count': 1,
9291475f 2506 }, {
0e30a7b9 2507 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2508 'info_dict': {
0e30a7b9 2509 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2510 'uploader': 'Sergey M.',
2511 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2512 'title': 'youtube-dl empty playlist',
9291475f
PH
2513 },
2514 'playlist_count': 0,
2515 }, {
2516 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2517 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2518 'info_dict': {
2519 'title': '29C3: Not my department',
acf757f4 2520 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
13a75688
S
2521 'uploader': 'Christiaan008',
2522 'uploader_id': 'ChRiStIaAn008',
9291475f 2523 },
0e30a7b9 2524 'playlist_count': 96,
9291475f
PH
2525 }, {
2526 'note': 'issue #673',
2527 'url': 'PLBB231211A4F62143',
2528 'info_dict': {
f46a8702 2529 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 2530 'id': 'PLBB231211A4F62143',
13a75688
S
2531 'uploader': 'Wickydoo',
2532 'uploader_id': 'Wickydoo',
9291475f
PH
2533 },
2534 'playlist_mincount': 26,
2535 }, {
2536 'note': 'Large playlist',
2537 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2538 'info_dict': {
2539 'title': 'Uploads from Cauchemar',
acf757f4 2540 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
13a75688
S
2541 'uploader': 'Cauchemar',
2542 'uploader_id': 'Cauchemar89',
9291475f
PH
2543 },
2544 'playlist_mincount': 799,
2545 }, {
2546 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2547 'info_dict': {
2548 'title': 'YDL_safe_search',
acf757f4 2549 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
2550 },
2551 'playlist_count': 2,
4201ba13 2552 'skip': 'This playlist is private',
ac7553d0
PH
2553 }, {
2554 'note': 'embedded',
2d3d2997 2555 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0
PH
2556 'playlist_count': 4,
2557 'info_dict': {
2558 'title': 'JODA15',
acf757f4 2559 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
13a75688
S
2560 'uploader': 'milan',
2561 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
ac7553d0 2562 }
87dadd45
S
2563 }, {
2564 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2565 'playlist_mincount': 485,
2566 'info_dict': {
13a75688 2567 'title': '2018 Chinese New Singles (11/6 updated)',
87dadd45 2568 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
13a75688
S
2569 'uploader': 'LBK',
2570 'uploader_id': 'sdragonfang',
87dadd45 2571 }
6b08cdf6
PH
2572 }, {
2573 'note': 'Embedded SWF player',
2d3d2997 2574 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
6b08cdf6
PH
2575 'playlist_count': 4,
2576 'info_dict': {
2577 'title': 'JODA7',
acf757f4 2578 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
13a75688
S
2579 },
2580 'skip': 'This playlist does not exist',
4b7df0d3
JMF
2581 }, {
2582 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2583 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2584 'info_dict': {
acf757f4
PH
2585 'title': 'Uploads from Interstellar Movie',
2586 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688
S
2587 'uploader': 'Interstellar Movie',
2588 'uploader_id': 'InterstellarMovie1',
4b7df0d3 2589 },
481cc733 2590 'playlist_mincount': 21,
dacb3a86
S
2591 }, {
2592 # Playlist URL that does not actually serve a playlist
2593 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2594 'info_dict': {
2595 'id': 'FqZTN594JQw',
2596 'ext': 'webm',
2597 'title': "Smiley's People 01 detective, Adventure Series, Action",
2598 'uploader': 'STREEM',
2599 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2600 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2601 'upload_date': '20150526',
2602 'license': 'Standard YouTube License',
2603 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2604 'categories': ['People & Blogs'],
2605 'tags': list,
dbdaaa23 2606 'view_count': int,
dacb3a86
S
2607 'like_count': int,
2608 'dislike_count': int,
2609 },
2610 'params': {
2611 'skip_download': True,
2612 },
13a75688 2613 'skip': 'This video is not available.',
dacb3a86 2614 'add_ie': [YoutubeIE.ie_key()],
481cc733
S
2615 }, {
2616 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2617 'info_dict': {
2618 'id': 'yeWKywCrFtk',
2619 'ext': 'mp4',
2620 'title': 'Small Scale Baler and Braiding Rugs',
2621 'uploader': 'Backus-Page House Museum',
2622 'uploader_id': 'backuspagemuseum',
ec85ded8 2623 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
481cc733 2624 'upload_date': '20161008',
481cc733
S
2625 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2626 'categories': ['Nonprofits & Activism'],
2627 'tags': list,
2628 'like_count': int,
2629 'dislike_count': int,
2630 },
2631 'params': {
2632 'noplaylist': True,
2633 'skip_download': True,
2634 },
2e18adec
S
2635 }, {
2636 # https://github.com/ytdl-org/youtube-dl/issues/21844
2637 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2638 'info_dict': {
2639 'title': 'Data Analysis with Dr Mike Pound',
2640 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2641 'uploader_id': 'Computerphile',
2642 'uploader': 'Computerphile',
2643 },
2644 'playlist_mincount': 11,
feaa5ad7
S
2645 }, {
2646 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2647 'only_matching': True,
a6857510
S
2648 }, {
2649 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2650 'only_matching': True,
409b9324
S
2651 }, {
2652 # music album playlist
2653 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2654 'only_matching': True,
c0345b82
S
2655 }, {
2656 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2657 'only_matching': True,
66b48727
RA
2658 }, {
2659 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2660 'only_matching': True,
81127aa5 2661 }]
c5e8d7af 2662
880e1c52
JMF
2663 def _real_initialize(self):
2664 self._login()
2665
351f37c0
S
2666 def extract_videos_from_page(self, page):
2667 ids_in_page = []
2668 titles_in_page = []
2669
2670 for item in re.findall(
2671 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2672 attrs = extract_attributes(item)
2673 video_id = attrs['data-video-id']
2674 video_title = unescapeHTML(attrs.get('data-title'))
2675 if video_title:
2676 video_title = video_title.strip()
2677 ids_in_page.append(video_id)
2678 titles_in_page.append(video_title)
2679
2680 # Fallback with old _VIDEO_RE
2681 self.extract_videos_from_page_impl(
2682 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2683
2684 # Relaxed fallbacks
2685 self.extract_videos_from_page_impl(
2686 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2687 ids_in_page, titles_in_page)
2688 self.extract_videos_from_page_impl(
2689 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2690 ids_in_page, titles_in_page)
2691
2692 return zip(ids_in_page, titles_in_page)
2693
652cdaa2 2694 def _extract_mix(self, playlist_id):
99209c29 2695 # The mixes are generated from a single video
652cdaa2 2696 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
2697 ids = []
2698 last_id = playlist_id[-11:]
2699 for n in itertools.count(1):
2700 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2701 webpage = self._download_webpage(
2702 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2703 new_ids = orderedSet(re.findall(
2704 r'''(?xs)data-video-username=".*?".*?
2705 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2706 webpage))
2707 # Fetch new pages until all the videos are repeated, it seems that
2708 # there are always 51 unique videos.
2709 new_ids = [_id for _id in new_ids if _id not in ids]
2710 if not new_ids:
2711 break
2712 ids.extend(new_ids)
2713 last_id = ids[-1]
2714
2715 url_results = self._ids_to_results(ids)
2716
bc2f773b 2717 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5 2718 title_span = (
3089bc74
S
2719 search_title('playlist-title')
2720 or search_title('title long-title')
2721 or search_title('title'))
76d1700b 2722 title = clean_html(title_span)
652cdaa2
JMF
2723
2724 return self.playlist_result(url_results, playlist_id, title)
2725
448830ce 2726 def _extract_playlist(self, playlist_id):
dbb94fb0
S
2727 url = self._TEMPLATE_URL % playlist_id
2728 page = self._download_webpage(url, playlist_id)
dbb94fb0 2729
067aa17e 2730 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
8bc0800d 2731 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
39b62db1
YCH
2732 match = match.strip()
2733 # Check if the playlist exists or is private
4201ba13
S
2734 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2735 if mobj:
2736 reason = mobj.group('reason')
2737 message = 'This playlist %s' % reason
2738 if 'private' in reason:
2739 message += ', use --username or --netrc to access it'
2740 message += '.'
2741 raise ExtractorError(message, expected=True)
39b62db1
YCH
2742 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2743 raise ExtractorError(
2744 'Invalid parameters. Maybe URL is incorrect.',
2745 expected=True)
2746 elif re.match(r'[^<]*Choose your language[^<]*', match):
2747 continue
2748 else:
2749 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 2750
dbb94fb0 2751 playlist_title = self._html_search_regex(
63b4295d 2752 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
dacb3a86 2753 page, 'title', default=None)
c5e8d7af 2754
07aeced6 2755 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
4e3f1f04 2756 uploader = self._html_search_regex(
07aeced6
S
2757 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2758 page, 'uploader', default=None)
2759 mobj = re.search(
2760 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2761 page)
2762 if mobj:
2763 uploader_id = mobj.group('uploader_id')
2764 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2765 else:
2766 uploader_id = uploader_url = None
2767
dacb3a86
S
2768 has_videos = True
2769
2770 if not playlist_title:
2771 try:
2772 # Some playlist URLs don't actually serve a playlist (e.g.
2773 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2774 next(self._entries(page, playlist_id))
2775 except StopIteration:
2776 has_videos = False
2777
07aeced6 2778 playlist = self.playlist_result(
dacb3a86 2779 self._entries(page, playlist_id), playlist_id, playlist_title)
07aeced6
S
2780 playlist.update({
2781 'uploader': uploader,
2782 'uploader_id': uploader_id,
2783 'uploader_url': uploader_url,
2784 })
2785
2786 return has_videos, playlist
c5e8d7af 2787
ebf1b291 2788 def _check_download_just_video(self, url, playlist_id):
448830ce
S
2789 # Check if it's a video-specific URL
2790 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
481cc733 2791 video_id = query_dict.get('v', [None])[0] or self._search_regex(
87dadd45 2792 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
481cc733
S
2793 'video id', default=None)
2794 if video_id:
448830ce
S
2795 if self._downloader.params.get('noplaylist'):
2796 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
dacb3a86 2797 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce
S
2798 else:
2799 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
dacb3a86
S
2800 return video_id, None
2801 return None, None
448830ce 2802
ebf1b291
S
2803 def _real_extract(self, url):
2804 # Extract playlist id
2805 mobj = re.match(self._VALID_URL, url)
2806 if mobj is None:
2807 raise ExtractorError('Invalid URL: %s' % url)
2808 playlist_id = mobj.group(1) or mobj.group(2)
2809
dacb3a86 2810 video_id, video = self._check_download_just_video(url, playlist_id)
ebf1b291
S
2811 if video:
2812 return video
2813
466a6145 2814 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
2815 # Mixes require a custom extraction process
2816 return self._extract_mix(playlist_id)
2817
dacb3a86
S
2818 has_videos, playlist = self._extract_playlist(playlist_id)
2819 if has_videos or not video_id:
2820 return playlist
2821
2822 # Some playlist URLs don't actually serve a playlist (see
067aa17e 2823 # https://github.com/ytdl-org/youtube-dl/issues/10537).
dacb3a86
S
2824 # Fallback to plain video extraction if there is a video id
2825 # along with playlist id.
2826 return self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce 2827
c5e8d7af 2828
648e6a1f 2829class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2830 IE_DESC = 'YouTube.com channels'
66b48727 2831 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 2832 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 2833 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 2834 IE_NAME = 'youtube:channel'
cdc628a4
PH
2835 _TESTS = [{
2836 'note': 'paginated channel',
2837 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2838 'playlist_mincount': 91,
acf757f4 2839 'info_dict': {
9170ca5b
JMF
2840 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2841 'title': 'Uploads from lex will',
13a75688
S
2842 'uploader': 'lex will',
2843 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
acf757f4 2844 }
5c43afd4
JMF
2845 }, {
2846 'note': 'Age restricted channel',
2847 # from https://www.youtube.com/user/DeusExOfficial
2848 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2849 'playlist_mincount': 64,
2850 'info_dict': {
2851 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2852 'title': 'Uploads from Deus Ex',
13a75688
S
2853 'uploader': 'Deus Ex',
2854 'uploader_id': 'DeusExOfficial',
5c43afd4 2855 },
cd5a74a2
S
2856 }, {
2857 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2858 'only_matching': True,
66b48727
RA
2859 }, {
2860 'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2861 'only_matching': True,
cdc628a4 2862 }]
c5e8d7af 2863
e462474e
S
2864 @classmethod
2865 def suitable(cls, url):
f07e276a
S
2866 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2867 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 2868
9558dcec
S
2869 def _build_template_url(self, url, channel_id):
2870 return self._TEMPLATE_URL % channel_id
2871
c5e8d7af 2872 def _real_extract(self, url):
9ff67727 2873 channel_id = self._match_id(url)
c5e8d7af 2874
9558dcec 2875 url = self._build_template_url(url, channel_id)
386bdfa6
S
2876
2877 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2878 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2879 # otherwise fallback on channel by page extraction
2880 channel_page = self._download_webpage(
2881 url + '?view=57', channel_id,
2882 'Downloading channel page', fatal=False)
2b3c2546
PH
2883 if channel_page is False:
2884 channel_playlist_id = False
2885 else:
2886 channel_playlist_id = self._html_search_meta(
2887 'channelId', channel_page, 'channel id', default=None)
2888 if not channel_playlist_id:
73c4ac2c
S
2889 channel_url = self._html_search_meta(
2890 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2891 channel_page, 'channel url', default=None)
2892 if channel_url:
2893 channel_playlist_id = self._search_regex(
2894 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2895 channel_url, 'channel id', default=None)
386bdfa6
S
2896 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2897 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
2898 return self.url_result(
2899 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 2900
60bf45c8 2901 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
2902 autogenerated = re.search(r'''(?x)
2903 class="[^"]*?(?:
2904 channel-header-autogenerated-label|
2905 yt-channel-title-autogenerated
2906 )[^"]*"''', channel_page) is not None
c5e8d7af 2907
b9643eed
JMF
2908 if autogenerated:
2909 # The videos are contained in a single page
2910 # the ajax pages can't be used, they are empty
b82f815f 2911 entries = [
fb69240c
S
2912 self.url_result(
2913 video_id, 'Youtube', video_id=video_id,
2914 video_title=video_title)
8f02ad4f 2915 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
2916 return self.playlist_result(entries, channel_id)
2917
73c4ac2c
S
2918 try:
2919 next(self._entries(channel_page, channel_id))
2920 except StopIteration:
2921 alert_message = self._html_search_regex(
2922 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2923 channel_page, 'alert', default=None, group='alert')
2924 if alert_message:
2925 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2926
648e6a1f 2927 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
2928
2929
eb0f3e7e 2930class YoutubeUserIE(YoutubeChannelIE):
78caa52a 2931 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
ea696249 2932 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
9558dcec 2933 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 2934 IE_NAME = 'youtube:user'
c5e8d7af 2935
cdc628a4
PH
2936 _TESTS = [{
2937 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2938 'playlist_mincount': 320,
2939 'info_dict': {
73c4ac2c
S
2940 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2941 'title': 'Uploads from The Linux Foundation',
13a75688
S
2942 'uploader': 'The Linux Foundation',
2943 'uploader_id': 'TheLinuxFoundation',
cdc628a4 2944 }
9558dcec
S
2945 }, {
2946 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2947 # but not https://www.youtube.com/user/12minuteathlete/videos
2948 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2949 'playlist_mincount': 249,
2950 'info_dict': {
2951 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2952 'title': 'Uploads from 12 Minute Athlete',
13a75688
S
2953 'uploader': '12 Minute Athlete',
2954 'uploader_id': 'the12minuteathlete',
9558dcec 2955 }
cdc628a4
PH
2956 }, {
2957 'url': 'ytuser:phihag',
2958 'only_matching': True,
daa0df9e
YCH
2959 }, {
2960 'url': 'https://www.youtube.com/c/gametrailers',
2961 'only_matching': True,
9558dcec
S
2962 }, {
2963 'url': 'https://www.youtube.com/gametrailers',
2964 'only_matching': True,
73c4ac2c 2965 }, {
0e879f43 2966 # This channel is not available, geo restricted to JP
73c4ac2c
S
2967 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2968 'only_matching': True,
cdc628a4
PH
2969 }]
2970
e3ea4790 2971 @classmethod
f4b05232 2972 def suitable(cls, url):
e3ea4790
JMF
2973 # Don't return True if the url can be extracted with other youtube
2974 # extractor, the regex would is too permissive and it would match.
f3a58d46 2975 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2976 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
2977 return False
2978 else:
2979 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 2980
9558dcec
S
2981 def _build_template_url(self, url, channel_id):
2982 mobj = re.match(self._VALID_URL, url)
2983 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2984
b05654f0 2985
f07e276a
S
2986class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2987 IE_DESC = 'YouTube.com live streams'
073d5bf5 2988 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
f07e276a
S
2989 IE_NAME = 'youtube:live'
2990
2991 _TESTS = [{
2d3d2997 2992 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
f07e276a
S
2993 'info_dict': {
2994 'id': 'a48o2S1cPoo',
2995 'ext': 'mp4',
2996 'title': 'The Young Turks - Live Main Show',
2997 'uploader': 'The Young Turks',
2998 'uploader_id': 'TheYoungTurks',
ec85ded8 2999 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
f07e276a
S
3000 'upload_date': '20150715',
3001 'license': 'Standard YouTube License',
3002 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3003 'categories': ['News & Politics'],
3004 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3005 'like_count': int,
3006 'dislike_count': int,
3007 },
3008 'params': {
3009 'skip_download': True,
3010 },
3011 }, {
2d3d2997 3012 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
f07e276a 3013 'only_matching': True,
c1b2a085
S
3014 }, {
3015 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3016 'only_matching': True,
073d5bf5
S
3017 }, {
3018 'url': 'https://www.youtube.com/TheYoungTurks/live',
3019 'only_matching': True,
f07e276a
S
3020 }]
3021
3022 def _real_extract(self, url):
3023 mobj = re.match(self._VALID_URL, url)
3024 channel_id = mobj.group('id')
3025 base_url = mobj.group('base_url')
3026 webpage = self._download_webpage(url, channel_id, fatal=False)
3027 if webpage:
3028 page_type = self._og_search_property(
e7f3529f 3029 'type', webpage, 'page type', default='')
f07e276a
S
3030 video_id = self._html_search_meta(
3031 'videoId', webpage, 'video id', default=None)
e7f3529f
S
3032 if page_type.startswith('video') and video_id and re.match(
3033 r'^[0-9A-Za-z_-]{11}$', video_id):
f07e276a
S
3034 return self.url_result(video_id, YoutubeIE.ie_key())
3035 return self.url_result(base_url)
3036
3037
e462474e
S
3038class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3039 IE_DESC = 'YouTube.com user/channel playlists'
3040 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
3041 IE_NAME = 'youtube:playlists'
0c148415 3042
e568c223 3043 _TESTS = [{
2d3d2997 3044 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
0c148415
S
3045 'playlist_mincount': 4,
3046 'info_dict': {
3047 'id': 'ThirstForScience',
13a75688 3048 'title': 'ThirstForScience',
0c148415 3049 },
e568c223
S
3050 }, {
3051 # with "Load more" button
2d3d2997 3052 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
e568c223
S
3053 'playlist_mincount': 70,
3054 'info_dict': {
3055 'id': 'igorkle1',
3056 'title': 'Игорь Клейнер',
3057 },
e462474e
S
3058 }, {
3059 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3060 'playlist_mincount': 17,
3061 'info_dict': {
3062 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3063 'title': 'Chem Player',
3064 },
13a75688 3065 'skip': 'Blocked',
e568c223 3066 }]
0c148415
S
3067
3068
870f3bfc
S
3069class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3070 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3071
3072
3073class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
78caa52a 3074 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
3075 # there doesn't appear to be a real limit, for example if you search for
3076 # 'python' you get more than 8.000.000 results
3077 _MAX_RESULTS = float('inf')
78caa52a 3078 IE_NAME = 'youtube:search'
b05654f0 3079 _SEARCH_KEY = 'ytsearch'
b4c08069 3080 _EXTRA_QUERY_ARGS = {}
9dd8e46a 3081 _TESTS = []
b05654f0 3082
b05654f0
PH
3083 def _get_n_results(self, query, n):
3084 """Get a specified number of results for a query"""
3085
b4c08069 3086 videos = []
b05654f0
PH
3087 limit = n
3088
a22b2fd1
YCH
3089 url_query = {
3090 'search_query': query.encode('utf-8'),
3091 }
3092 url_query.update(self._EXTRA_QUERY_ARGS)
3093 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3094
b4c08069 3095 for pagenum in itertools.count(1):
b4c08069 3096 data = self._download_json(
69ea8ca4 3097 result_url, video_id='query "%s"' % query,
b4c08069 3098 note='Downloading page %s' % pagenum,
a22b2fd1
YCH
3099 errnote='Unable to download API page',
3100 query={'spf': 'navigate'})
b4c08069 3101 html_content = data[1]['body']['content']
7cc3570e 3102
b4c08069 3103 if 'class="search-message' in html_content:
07ad22b8 3104 raise ExtractorError(
78caa52a 3105 '[youtube] No video results', expected=True)
b05654f0 3106
870f3bfc 3107 new_videos = list(self._process_page(html_content))
b4c08069
JMF
3108 videos += new_videos
3109 if not new_videos or len(videos) > limit:
3110 break
a22b2fd1
YCH
3111 next_link = self._html_search_regex(
3112 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3113 html_content, 'next link', default=None)
3114 if next_link is None:
3115 break
3116 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
b05654f0 3117
b4c08069
JMF
3118 if len(videos) > n:
3119 videos = videos[:n]
b05654f0 3120 return self.playlist_result(videos, query)
75dff0ee 3121
c9ae7b95 3122
a3dd9248 3123class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3124 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3125 _SEARCH_KEY = 'ytsearchdate'
78caa52a 3126 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 3127 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 3128
c9ae7b95 3129
870f3bfc 3130class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
78caa52a
PH
3131 IE_DESC = 'YouTube.com search URLs'
3132 IE_NAME = 'youtube:search_url'
d2c1f79f 3133 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
cdc628a4
PH
3134 _TESTS = [{
3135 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3136 'playlist_mincount': 5,
3137 'info_dict': {
3138 'title': 'youtube-dl test video',
3139 }
d2c1f79f
S
3140 }, {
3141 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3142 'only_matching': True,
cdc628a4 3143 }]
c9ae7b95
PH
3144
3145 def _real_extract(self, url):
3146 mobj = re.match(self._VALID_URL, url)
7fd002c0 3147 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 3148 webpage = self._download_webpage(url, query)
175c2e9e 3149 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
3150
3151
136dadde 3152class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 3153 IE_DESC = 'YouTube.com (multi-season) shows'
92519402 3154 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 3155 IE_NAME = 'youtube:show'
cdc628a4 3156 _TESTS = [{
4003bd82 3157 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 3158 'playlist_mincount': 5,
cdc628a4
PH
3159 'info_dict': {
3160 'id': 'airdisasters',
3161 'title': 'Air Disasters',
3162 }
3163 }]
75dff0ee
JMF
3164
3165 def _real_extract(self, url):
136dadde
S
3166 playlist_id = self._match_id(url)
3167 return super(YoutubeShowIE, self)._real_extract(
3168 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
3169
3170
b2e8bc1b 3171class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 3172 """
25f14e9f 3173 Base class for feed extractors
d7ae0639
JMF
3174 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3175 """
b2e8bc1b 3176 _LOGIN_REQUIRED = True
d7ae0639
JMF
3177
3178 @property
3179 def IE_NAME(self):
78caa52a 3180 return 'youtube:%s' % self._FEED_NAME
04cc9617 3181
81f0259b 3182 def _real_initialize(self):
b2e8bc1b 3183 self._login()
81f0259b 3184
3853309f 3185 def _entries(self, page):
2bc43303
JMF
3186 # The extraction process is the same as for playlists, but the regex
3187 # for the video ids doesn't contain an index
3188 ids = []
3189 more_widget_html = content_html = page
2bc43303
JMF
3190 for page_num in itertools.count(1):
3191 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
3192
3193 # 'recommended' feed has infinite 'load more' and each new portion spins
3194 # the same videos in (sometimes) slightly different order, so we'll check
3195 # for unicity and break when portion has no new videos
3853309f 3196 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
62c95fd5
S
3197 if not new_ids:
3198 break
3199
2bc43303
JMF
3200 ids.extend(new_ids)
3201
3853309f
S
3202 for entry in self._ids_to_results(new_ids):
3203 yield entry
3204
2bc43303
JMF
3205 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3206 if not mobj:
3207 break
3208
3209 more = self._download_json(
25f14e9f 3210 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303
JMF
3211 'Downloading page #%s' % page_num,
3212 transform_source=uppercase_escape)
3213 content_html = more['content_html']
3214 more_widget_html = more['load_more_widget_html']
3215
3853309f
S
3216 def _real_extract(self, url):
3217 page = self._download_webpage(
3218 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3219 self._PLAYLIST_TITLE)
25f14e9f 3220 return self.playlist_result(
3853309f 3221 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
25f14e9f
S
3222
3223
3224class YoutubeWatchLaterIE(YoutubePlaylistIE):
3225 IE_NAME = 'youtube:watchlater'
3226 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
92519402 3227 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 3228
bc7a9cd8
S
3229 _TESTS = [{
3230 'url': 'https://www.youtube.com/playlist?list=WL',
3231 'only_matching': True,
3232 }, {
3233 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3234 'only_matching': True,
3235 }]
25f14e9f
S
3236
3237 def _real_extract(self, url):
7e5dc339 3238 _, video = self._check_download_just_video(url, 'WL')
ebf1b291
S
3239 if video:
3240 return video
dacb3a86
S
3241 _, playlist = self._extract_playlist('WL')
3242 return playlist
f459d170 3243
5f6a1245 3244
c626a3d9 3245class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 3246 IE_NAME = 'youtube:favorites'
f3a34072 3247 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
92519402 3248 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
3249 _LOGIN_REQUIRED = True
3250
3251 def _real_extract(self, url):
3252 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 3253 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 3254 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
3255
3256
25f14e9f
S
3257class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3258 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
92519402 3259 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
25f14e9f
S
3260 _FEED_NAME = 'recommended'
3261 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 3262
1ed5b5c9 3263
25f14e9f
S
3264class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3265 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
92519402 3266 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
25f14e9f
S
3267 _FEED_NAME = 'subscriptions'
3268 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 3269
1ed5b5c9 3270
25f14e9f
S
3271class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3272 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
92519402 3273 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
25f14e9f
S
3274 _FEED_NAME = 'history'
3275 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
3276
3277
15870e90
PH
3278class YoutubeTruncatedURLIE(InfoExtractor):
3279 IE_NAME = 'youtube:truncated_url'
3280 IE_DESC = False # Do not list
975d35db 3281 _VALID_URL = r'''(?x)
b95aab84
PH
3282 (?:https?://)?
3283 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3284 (?:watch\?(?:
c4808c60 3285 feature=[a-z_]+|
b95aab84
PH
3286 annotation_id=annotation_[^&]+|
3287 x-yt-cl=[0-9]+|
c1708b89 3288 hl=[^&]*|
287be8c6 3289 t=[0-9]+
b95aab84
PH
3290 )?
3291 |
3292 attribution_link\?a=[^&]+
3293 )
3294 $
975d35db 3295 '''
15870e90 3296
c4808c60 3297 _TESTS = [{
2d3d2997 3298 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3299 'only_matching': True,
dc2fc736 3300 }, {
2d3d2997 3301 'url': 'https://www.youtube.com/watch?',
dc2fc736 3302 'only_matching': True,
b95aab84
PH
3303 }, {
3304 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3305 'only_matching': True,
3306 }, {
3307 'url': 'https://www.youtube.com/watch?feature=foo',
3308 'only_matching': True,
c1708b89
PH
3309 }, {
3310 'url': 'https://www.youtube.com/watch?hl=en-GB',
3311 'only_matching': True,
287be8c6
PH
3312 }, {
3313 'url': 'https://www.youtube.com/watch?t=2372',
3314 'only_matching': True,
c4808c60
PH
3315 }]
3316
15870e90
PH
3317 def _real_extract(self, url):
3318 raise ExtractorError(
78caa52a
PH
3319 'Did you forget to quote the URL? Remember that & is a meta '
3320 'character in most shells, so you want to put the URL in quotes, '
3321 'like youtube-dl '
2d3d2997 3322 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
78caa52a 3323 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3324 expected=True)
772fd5cc
PH
3325
3326
3327class YoutubeTruncatedIDIE(InfoExtractor):
3328 IE_NAME = 'youtube:truncated_id'
3329 IE_DESC = False # Do not list
b95aab84 3330 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3331
3332 _TESTS = [{
3333 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3334 'only_matching': True,
3335 }]
3336
3337 def _real_extract(self, url):
3338 video_id = self._match_id(url)
3339 raise ExtractorError(
3340 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3341 expected=True)