]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
[tv2] detect DRM protection
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
f8c55c66 19 compat_HTTPError,
8d81f3e3 20 compat_kwargs,
c5e8d7af 21 compat_parse_qs,
7fd002c0
S
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
c5e8d7af 27 compat_str,
4bb4a188
PH
28)
29from ..utils import (
27019dbb 30 bool_or_none,
c5e8d7af 31 clean_html,
026fbedc 32 dict_get,
9b9c5355 33 error_to_compat_str,
351f37c0 34 extract_attributes,
c5e8d7af 35 ExtractorError,
2d30521a 36 float_or_none,
4bb4a188
PH
37 get_element_by_attribute,
38 get_element_by_id,
dd27fd17 39 int_or_none,
94278f72 40 mimetype2ext,
4bb4a188 41 orderedSet,
6310acf5 42 parse_codecs,
7c80519c 43 parse_duration,
0cb58b02 44 remove_quotes,
3995d37d 45 remove_start,
cf7e015f 46 smuggle_url,
dbdaaa23 47 str_or_none,
c93d53f5 48 str_to_int,
556dbe7f 49 try_get,
c5e8d7af
PH
50 unescapeHTML,
51 unified_strdate,
cf7e015f 52 unsmuggle_url,
81c2f20b 53 uppercase_escape,
21c340b8 54 url_or_none,
6e6bc8da 55 urlencode_postdata,
c5e8d7af
PH
56)
57
5f6a1245 58
de7f3446 59class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
60 """Provide base functions for Youtube extractors"""
61 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 62 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
63
64 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
65 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
66 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 67
b2e8bc1b
JMF
68 _NETRC_MACHINE = 'youtube'
69 # If True it will raise an error if no login info is provided
70 _LOGIN_REQUIRED = False
71
409b9324 72 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
d0ba5587 73
b2e8bc1b 74 def _set_language(self):
810fb84d
PH
75 self._set_cookie(
76 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 77 # YouTube sets the expire time to about two months
810fb84d 78 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 79
25f14e9f
S
80 def _ids_to_results(self, ids):
81 return [
82 self.url_result(vid_id, 'Youtube', video_id=vid_id)
83 for vid_id in ids]
84
b2e8bc1b 85 def _login(self):
83317f69 86 """
87 Attempt to log in to YouTube.
88 True is returned if successful or skipped.
89 False is returned if login failed.
90
91 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
92 """
68217024 93 username, password = self._get_login_info()
b2e8bc1b
JMF
94 # No authentication to be performed
95 if username is None:
70d35d16 96 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 97 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 98 return True
b2e8bc1b 99
7cc3570e
PH
100 login_page = self._download_webpage(
101 self._LOGIN_URL, None,
69ea8ca4
PH
102 note='Downloading login page',
103 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
104 if login_page is False:
105 return
b2e8bc1b 106
1212e997 107 login_form = self._hidden_inputs(login_page)
c5e8d7af 108
e00eb564
S
109 def req(url, f_req, note, errnote):
110 data = login_form.copy()
111 data.update({
112 'pstMsg': 1,
113 'checkConnection': 'youtube',
114 'checkedDomains': 'youtube',
115 'hl': 'en',
116 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 117 'f.req': json.dumps(f_req),
e00eb564
S
118 'flowName': 'GlifWebSignIn',
119 'flowEntry': 'ServiceLogin',
baf67a60
S
120 # TODO: reverse actual botguard identifier generation algo
121 'bgRequest': '["identifier",""]',
041bc3ad 122 })
e00eb564
S
123 return self._download_json(
124 url, None, note=note, errnote=errnote,
125 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
126 fatal=False,
127 data=urlencode_postdata(data), headers={
128 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
129 'Google-Accounts-XSRF': 1,
130 })
131
3995d37d
S
132 def warn(message):
133 self._downloader.report_warning(message)
134
135 lookup_req = [
136 username,
137 None, [], None, 'US', None, None, 2, False, True,
138 [
139 None, None,
140 [2, 1, None, 1,
141 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
142 None, [], 4],
143 1, [None, None, []], None, None, None, True
144 ],
145 username,
146 ]
147
e00eb564 148 lookup_results = req(
3995d37d 149 self._LOOKUP_URL, lookup_req,
e00eb564
S
150 'Looking up account info', 'Unable to look up account info')
151
152 if lookup_results is False:
153 return False
041bc3ad 154
3995d37d
S
155 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
156 if not user_hash:
157 warn('Unable to extract user hash')
158 return False
159
160 challenge_req = [
161 user_hash,
162 None, 1, None, [1, None, None, None, [password, None, True]],
163 [
164 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
165 1, [None, None, []], None, None, None, True
166 ]]
83317f69 167
3995d37d
S
168 challenge_results = req(
169 self._CHALLENGE_URL, challenge_req,
170 'Logging in', 'Unable to log in')
83317f69 171
3995d37d 172 if challenge_results is False:
e00eb564 173 return
83317f69 174
3995d37d
S
175 login_res = try_get(challenge_results, lambda x: x[0][5], list)
176 if login_res:
177 login_msg = try_get(login_res, lambda x: x[5], compat_str)
178 warn(
179 'Unable to login: %s' % 'Invalid password'
180 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
181 return False
182
183 res = try_get(challenge_results, lambda x: x[0][-1], list)
184 if not res:
185 warn('Unable to extract result entry')
186 return False
187
9a6628aa
S
188 login_challenge = try_get(res, lambda x: x[0][0], list)
189 if login_challenge:
190 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
191 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
192 # SEND_SUCCESS - TFA code has been successfully sent to phone
193 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 194 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
195 if status == 'QUOTA_EXCEEDED':
196 warn('Exceeded the limit of TFA codes, try later')
197 return False
198
199 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
200 if not tl:
201 warn('Unable to extract TL')
202 return False
203
204 tfa_code = self._get_tfa_info('2-step verification code')
205
206 if not tfa_code:
207 warn(
208 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
209 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
210 return False
211
212 tfa_code = remove_start(tfa_code, 'G-')
213
214 tfa_req = [
215 user_hash, None, 2, None,
216 [
217 9, None, None, None, None, None, None, None,
218 [None, tfa_code, True, 2]
219 ]]
220
221 tfa_results = req(
222 self._TFA_URL.format(tl), tfa_req,
223 'Submitting TFA code', 'Unable to submit TFA code')
224
225 if tfa_results is False:
226 return False
227
228 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
229 if tfa_res:
230 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
231 warn(
232 'Unable to finish TFA: %s' % 'Invalid TFA code'
233 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
234 return False
235
236 check_cookie_url = try_get(
237 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
238 else:
239 CHALLENGES = {
240 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
241 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
242 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
243 }
244 challenge = CHALLENGES.get(
245 challenge_str,
246 '%s returned error %s.' % (self.IE_NAME, challenge_str))
247 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
248 return False
3995d37d
S
249 else:
250 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
251
252 if not check_cookie_url:
253 warn('Unable to extract CheckCookie URL')
254 return False
e00eb564
S
255
256 check_cookie_results = self._download_webpage(
3995d37d
S
257 check_cookie_url, None, 'Checking cookie', fatal=False)
258
259 if check_cookie_results is False:
260 return False
e00eb564 261
3995d37d
S
262 if 'https://myaccount.google.com/' not in check_cookie_results:
263 warn('Unable to log in')
b2e8bc1b 264 return False
e00eb564 265
b2e8bc1b
JMF
266 return True
267
30226342 268 def _download_webpage_handle(self, *args, **kwargs):
c1148516
S
269 query = kwargs.get('query', {}).copy()
270 query['disable_polymer'] = 'true'
271 kwargs['query'] = query
30226342 272 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
273 *args, **compat_kwargs(kwargs))
274
b2e8bc1b
JMF
275 def _real_initialize(self):
276 if self._downloader is None:
277 return
42939b61 278 self._set_language()
b2e8bc1b
JMF
279 if not self._login():
280 return
c5e8d7af 281
8377574c 282
8e7aad20 283class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 284 # Extract entries from page with "Load more" button
648e6a1f
S
285 def _entries(self, page, playlist_id):
286 more_widget_html = content_html = page
287 for page_num in itertools.count(1):
061a75ed
S
288 for entry in self._process_page(content_html):
289 yield entry
648e6a1f
S
290
291 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
292 if not mobj:
293 break
294
f8c55c66
S
295 count = 0
296 retries = 3
297 while count <= retries:
298 try:
299 # Downloading page may result in intermittent 5xx HTTP error
300 # that is usually worked around with a retry
301 more = self._download_json(
302 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
303 'Downloading page #%s%s'
304 % (page_num, ' (retry #%d)' % count if count else ''),
305 transform_source=uppercase_escape)
306 break
307 except ExtractorError as e:
308 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
309 count += 1
310 if count <= retries:
311 continue
312 raise
313
648e6a1f
S
314 content_html = more['content_html']
315 if not content_html.strip():
316 # Some webpages show a "Load more" button but they don't
317 # have more videos
318 break
319 more_widget_html = more['load_more_widget_html']
320
061a75ed
S
321
322class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
323 def _process_page(self, content):
324 for video_id, video_title in self.extract_videos_from_page(content):
325 yield self.url_result(video_id, 'Youtube', video_id, video_title)
326
351f37c0
S
327 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
328 for mobj in re.finditer(video_re, page):
648e6a1f
S
329 # The link with index 0 is not the first video of the playlist (not sure if still actual)
330 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
331 continue
332 video_id = mobj.group('id')
351f37c0
S
333 video_title = unescapeHTML(
334 mobj.group('title')) if 'title' in mobj.groupdict() else None
648e6a1f
S
335 if video_title:
336 video_title = video_title.strip()
351f37c0
S
337 if video_title == '► Play all':
338 video_title = None
648e6a1f
S
339 try:
340 idx = ids_in_page.index(video_id)
341 if video_title and not titles_in_page[idx]:
342 titles_in_page[idx] = video_title
343 except ValueError:
344 ids_in_page.append(video_id)
345 titles_in_page.append(video_title)
351f37c0
S
346
347 def extract_videos_from_page(self, page):
348 ids_in_page = []
349 titles_in_page = []
350 self.extract_videos_from_page_impl(
351 self._VIDEO_RE, page, ids_in_page, titles_in_page)
648e6a1f
S
352 return zip(ids_in_page, titles_in_page)
353
354
061a75ed
S
355class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
356 def _process_page(self, content):
6dee688e
S
357 for playlist_id in orderedSet(re.findall(
358 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
359 content)):
061a75ed
S
360 yield self.url_result(
361 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
362
0c148415
S
363 def _real_extract(self, url):
364 playlist_id = self._match_id(url)
365 webpage = self._download_webpage(url, playlist_id)
0c148415 366 title = self._og_search_title(webpage, fatal=False)
061a75ed 367 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
368
369
360e1ca5 370class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 371 IE_DESC = 'YouTube.com'
cb7dfeea 372 _VALID_URL = r"""(?x)^
c5e8d7af 373 (
edb53e2d 374 (?:https?://|//) # http(s):// or protocol-independent URL
cb7dfeea 375 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
484aaeb2 376 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 377 (?:www\.)?pwnyoutube\.com/|
8b561bfc 378 (?:www\.)?hooktube\.com/|
f7000f3a 379 (?:www\.)?yourepeat\.com/|
e69ae5b9 380 tube\.majestyc\.net/|
ba036333 381 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
77d95677 382 (?:(?:www|dev)\.)?invidio\.us/|
ba036333 383 (?:(?:www|no)\.)?invidiou\.sh/|
384 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
8ae113ca 385 (?:www\.)?invidious\.kabi\.tk/|
ba036333 386 (?:www\.)?invidious\.13ad\.de/|
791d2e81 387 (?:www\.)?invidious\.mastodon\.host/|
494d664e 388 (?:www\.)?invidious\.nixnet\.xyz/|
666d808e 389 (?:www\.)?invidious\.drycat\.fr/|
ba036333 390 (?:www\.)?tube\.poal\.co/|
8ae113ca 391 (?:www\.)?vid\.wxzm\.sx/|
494d664e 392 (?:www\.)?yt\.elukerio\.org/|
894b3826 393 (?:www\.)?yt\.lelux\.fi/|
bff90fc5 394 (?:www\.)?kgg2m7yk5aybusll\.onion/|
395 (?:www\.)?qklhadlycap4cnod\.onion/|
396 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
397 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
398 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
399 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
33c1c7d8 400 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
e69ae5b9 401 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
402 (?:.*?\#/)? # handle anchor (#/) redirect urls
403 (?: # the various things that can precede the ID:
ac7553d0 404 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 405 |(?: # or the v= param in all its forms
f7000f3a 406 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 407 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 408 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
409 v=
410 )
f4b05232 411 ))
cbaed4bb
S
412 |(?:
413 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
414 vid\.plus| # or vid.plus/xxxx
415 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 416 )/
edb53e2d 417 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 418 )
c5e8d7af 419 )? # all until now is optional -> you can pass the naked ID
8963d9c2 420 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
421 (?!.*?\blist=
422 (?:
423 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
424 WL # WL are handled by the watch later IE
425 )
426 )
c5e8d7af 427 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 428 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 429 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26 430 _formats = {
c2d3cb4c 431 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
432 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
433 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
434 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
435 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
436 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
437 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
438 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 439 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 440 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
441 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
442 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
443 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
444 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
445 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 446 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 447 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
448 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 449
450
451 # 3D videos
c2d3cb4c 452 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
453 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
454 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
455 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 456 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
457 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
458 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 459
96fb5605 460 # Apple HTTP Live Streaming
11f12195 461 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 462 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
463 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
464 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
465 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
466 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 467 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
468 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
469
470 # DASH mp4 video
d23028a8
S
471 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
472 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
473 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
474 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
475 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 476 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
477 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
478 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
479 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
480 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
481 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
482 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 483
f6f1fc92 484 # Dash mp4 audio
d23028a8
S
485 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
486 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
487 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
488 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
489 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
490 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
491 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
492
493 # Dash webm
d23028a8
S
494 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
495 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
496 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
497 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
498 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
499 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
500 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
501 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
502 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
503 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
504 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
505 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
507 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
508 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 509 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
510 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
511 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
512 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
513 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
514 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
515 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
516
517 # Dash webm audio
d23028a8
S
518 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
519 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 520
0857baad 521 # Dash webm audio with opus inside
d23028a8
S
522 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
523 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
524 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 525
ce6b9a2d
PH
526 # RTMP (unnamed)
527 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
528
529 # av01 video only formats sometimes served with "unknown" codecs
530 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
531 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
532 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
533 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 534 }
19041a38 535 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 536
fd5c4aab
S
537 _GEO_BYPASS = False
538
78caa52a 539 IE_NAME = 'youtube'
2eb88d95
PH
540 _TESTS = [
541 {
2d3d2997 542 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
543 'info_dict': {
544 'id': 'BaW_jenozKc',
545 'ext': 'mp4',
546 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
547 'uploader': 'Philipp Hagemeister',
548 'uploader_id': 'phihag',
ec85ded8 549 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
550 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
551 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e
PH
552 'upload_date': '20121002',
553 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
554 'categories': ['Science & Technology'],
000b6b5a 555 'tags': ['youtube-dl'],
556dbe7f 556 'duration': 10,
dbdaaa23 557 'view_count': int,
3e7c1224
PH
558 'like_count': int,
559 'dislike_count': int,
7c80519c 560 'start_time': 1,
297a564b 561 'end_time': 9,
2eb88d95 562 }
0e853ca4 563 },
0e853ca4 564 {
2d3d2997 565 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
4bc3a23e
PH
566 'note': 'Test generic use_cipher_signature video (#897)',
567 'info_dict': {
568 'id': 'UxxajLWwzqY',
569 'ext': 'mp4',
570 'upload_date': '20120506',
571 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 572 'alt_title': 'I Love It (feat. Charli XCX)',
7caf9830 573 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
000b6b5a
S
574 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
575 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
576 'iconic ep', 'iconic', 'love', 'it'],
556dbe7f 577 'duration': 180,
4bc3a23e
PH
578 'uploader': 'Icona Pop',
579 'uploader_id': 'IconaPop',
ec85ded8 580 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
0cb58b02 581 'creator': 'Icona Pop',
936784b2
S
582 'track': 'I Love It (feat. Charli XCX)',
583 'artist': 'Icona Pop',
2eb88d95 584 }
c108eb73
JMF
585 },
586 {
4bc3a23e
PH
587 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
588 'note': 'Test VEVO video with age protection (#956)',
589 'info_dict': {
590 'id': '07FYdnEawAQ',
591 'ext': 'mp4',
592 'upload_date': '20130703',
4fe54c12 593 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
0cb58b02 594 'alt_title': 'Tunnel Vision',
4fe54c12 595 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
556dbe7f 596 'duration': 419,
4bc3a23e
PH
597 'uploader': 'justintimberlakeVEVO',
598 'uploader_id': 'justintimberlakeVEVO',
ec85ded8 599 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
0cb58b02 600 'creator': 'Justin Timberlake',
7e72694b 601 'track': 'Tunnel Vision',
936784b2 602 'artist': 'Justin Timberlake',
34952f09 603 'age_limit': 18,
c108eb73
JMF
604 }
605 },
fccd3771 606 {
4bc3a23e
PH
607 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
608 'note': 'Embed-only video (#1746)',
609 'info_dict': {
610 'id': 'yZIXLfi8CZQ',
611 'ext': 'mp4',
612 'upload_date': '20120608',
613 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
614 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
615 'uploader': 'SET India',
94bfcd23 616 'uploader_id': 'setindia',
ec85ded8 617 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 618 'age_limit': 18,
fccd3771
PH
619 }
620 },
11b56058 621 {
2d3d2997 622 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
623 'note': 'Use the first video ID in the URL',
624 'info_dict': {
625 'id': 'BaW_jenozKc',
626 'ext': 'mp4',
627 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
628 'uploader': 'Philipp Hagemeister',
629 'uploader_id': 'phihag',
ec85ded8 630 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058
PM
631 'upload_date': '20121002',
632 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
633 'categories': ['Science & Technology'],
634 'tags': ['youtube-dl'],
556dbe7f 635 'duration': 10,
dbdaaa23 636 'view_count': int,
11b56058
PM
637 'like_count': int,
638 'dislike_count': int,
34a7de29
S
639 },
640 'params': {
641 'skip_download': True,
642 },
11b56058 643 },
dd27fd17 644 {
2d3d2997 645 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
646 'note': '256k DASH audio (format 141) via DASH manifest',
647 'info_dict': {
648 'id': 'a9LDPn-MO4I',
649 'ext': 'm4a',
650 'upload_date': '20121002',
651 'uploader_id': '8KVIDEO',
ec85ded8 652 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
653 'description': '',
654 'uploader': '8KVIDEO',
655 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 656 },
4bc3a23e
PH
657 'params': {
658 'youtube_include_dash_manifest': True,
659 'format': '141',
4919603f 660 },
de3c7fe0 661 'skip': 'format 141 not served anymore',
dd27fd17 662 },
3489b7d2
JMF
663 # DASH manifest with encrypted signature
664 {
78caa52a
PH
665 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
666 'info_dict': {
667 'id': 'IB3lcPjvWLA',
668 'ext': 'm4a',
4fe54c12
S
669 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
670 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
556dbe7f 671 'duration': 244,
78caa52a
PH
672 'uploader': 'AfrojackVEVO',
673 'uploader_id': 'AfrojackVEVO',
674 'upload_date': '20131011',
3489b7d2 675 },
4bc3a23e 676 'params': {
78caa52a 677 'youtube_include_dash_manifest': True,
de3c7fe0 678 'format': '141/bestaudio[ext=m4a]',
3489b7d2
JMF
679 },
680 },
aaeb86f6
S
681 # JS player signature function name containing $
682 {
683 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
684 'info_dict': {
685 'id': 'nfWlot6h_JM',
686 'ext': 'm4a',
687 'title': 'Taylor Swift - Shake It Off',
4fe54c12 688 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
556dbe7f 689 'duration': 242,
aaeb86f6
S
690 'uploader': 'TaylorSwiftVEVO',
691 'uploader_id': 'TaylorSwiftVEVO',
692 'upload_date': '20140818',
0cb58b02 693 'creator': 'Taylor Swift',
aaeb86f6
S
694 },
695 'params': {
696 'youtube_include_dash_manifest': True,
de3c7fe0 697 'format': '141/bestaudio[ext=m4a]',
aaeb86f6
S
698 },
699 },
aa79ac0c
PH
700 # Controversy video
701 {
702 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
703 'info_dict': {
704 'id': 'T4XJQO3qol8',
705 'ext': 'mp4',
556dbe7f 706 'duration': 219,
aa79ac0c 707 'upload_date': '20100909',
4fe54c12 708 'uploader': 'Amazing Atheist',
aa79ac0c 709 'uploader_id': 'TheAmazingAtheist',
ec85ded8 710 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c
PH
711 'title': 'Burning Everyone\'s Koran',
712 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
713 }
c522adb1
JMF
714 },
715 # Normal age-gate video (No vevo, embed allowed)
716 {
2d3d2997 717 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
718 'info_dict': {
719 'id': 'HtVdAasjOgU',
720 'ext': 'mp4',
721 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 722 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 723 'duration': 142,
c522adb1
JMF
724 'uploader': 'The Witcher',
725 'uploader_id': 'WitcherGame',
ec85ded8 726 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 727 'upload_date': '20140605',
34952f09 728 'age_limit': 18,
c522adb1
JMF
729 },
730 },
fccae2b9
S
731 # Age-gate video with encrypted signature
732 {
2d3d2997 733 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
fccae2b9
S
734 'info_dict': {
735 'id': '6kLq3WMV1nU',
4fe54c12 736 'ext': 'mp4',
fccae2b9
S
737 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
738 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
eb6793ba 739 'duration': 246,
fccae2b9
S
740 'uploader': 'LloydVEVO',
741 'uploader_id': 'LloydVEVO',
ec85ded8 742 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 743 'upload_date': '20110629',
34952f09 744 'age_limit': 18,
fccae2b9
S
745 },
746 },
067aa17e 747 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
7d02dcfa 748 # YouTube Red ad is not captured for creator
774e208f
PH
749 {
750 'url': '__2ABJjxzNo',
751 'info_dict': {
752 'id': '__2ABJjxzNo',
753 'ext': 'mp4',
556dbe7f 754 'duration': 266,
774e208f
PH
755 'upload_date': '20100430',
756 'uploader_id': 'deadmau5',
ec85ded8 757 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
0cb58b02 758 'creator': 'deadmau5',
774e208f
PH
759 'description': 'md5:12c56784b8032162bb936a5f76d55360',
760 'uploader': 'deadmau5',
761 'title': 'Deadmau5 - Some Chords (HD)',
0cb58b02 762 'alt_title': 'Some Chords',
774e208f
PH
763 },
764 'expected_warnings': [
765 'DASH manifest missing',
766 ]
e52a40ab 767 },
067aa17e 768 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
769 {
770 'url': 'lqQg6PlCWgI',
771 'info_dict': {
772 'id': 'lqQg6PlCWgI',
773 'ext': 'mp4',
556dbe7f 774 'duration': 6085,
90227264 775 'upload_date': '20150827',
cbe2bd91 776 'uploader_id': 'olympic',
ec85ded8 777 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 778 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 779 'uploader': 'Olympic',
cbe2bd91
PH
780 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
781 },
782 'params': {
783 'skip_download': 'requires avconv',
e52a40ab 784 }
cbe2bd91 785 },
6271f1ca
PH
786 # Non-square pixels
787 {
788 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
789 'info_dict': {
790 'id': '_b-2C3KPAM0',
791 'ext': 'mp4',
792 'stretched_ratio': 16 / 9.,
556dbe7f 793 'duration': 85,
6271f1ca
PH
794 'upload_date': '20110310',
795 'uploader_id': 'AllenMeow',
ec85ded8 796 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 797 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 798 'uploader': '孫ᄋᄅ',
6271f1ca
PH
799 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
800 },
06b491eb
S
801 },
802 # url_encoded_fmt_stream_map is empty string
803 {
804 'url': 'qEJwOuvDf7I',
805 'info_dict': {
806 'id': 'qEJwOuvDf7I',
f57b7835 807 'ext': 'webm',
06b491eb
S
808 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
809 'description': '',
810 'upload_date': '20150404',
811 'uploader_id': 'spbelect',
812 'uploader': 'Наблюдатели Петербурга',
813 },
814 'params': {
815 'skip_download': 'requires avconv',
e323cf3f
S
816 },
817 'skip': 'This live event has ended.',
06b491eb 818 },
067aa17e 819 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
820 {
821 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
822 'info_dict': {
823 'id': 'FIl7x6_3R5Y',
eb6793ba 824 'ext': 'webm',
da77d856
S
825 'title': 'md5:7b81415841e02ecd4313668cde88737a',
826 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 827 'duration': 220,
da77d856
S
828 'upload_date': '20150625',
829 'uploader_id': 'dorappi2000',
ec85ded8 830 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 831 'uploader': 'dorappi2000',
eb6793ba 832 'formats': 'mincount:31',
da77d856 833 },
eb6793ba 834 'skip': 'not actual anymore',
2ee8f5d8 835 },
8a1a26ce
YCH
836 # DASH manifest with segment_list
837 {
838 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
839 'md5': '8ce563a1d667b599d21064e982ab9e31',
840 'info_dict': {
841 'id': 'CsmdDsKjzN8',
842 'ext': 'mp4',
17ee98e1 843 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
844 'uploader': 'Airtek',
845 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
846 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
847 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
848 },
849 'params': {
850 'youtube_include_dash_manifest': True,
851 'format': '135', # bestvideo
be49068d
S
852 },
853 'skip': 'This live event has ended.',
2ee8f5d8 854 },
cf7e015f
S
855 {
856 # Multifeed videos (multiple cameras), URL is for Main Camera
857 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
858 'info_dict': {
859 'id': 'jqWvoWXjCVs',
860 'title': 'teamPGP: Rocket League Noob Stream',
861 'description': 'md5:dc7872fb300e143831327f1bae3af010',
862 },
863 'playlist': [{
864 'info_dict': {
865 'id': 'jqWvoWXjCVs',
866 'ext': 'mp4',
867 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
868 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 869 'duration': 7335,
cf7e015f
S
870 'upload_date': '20150721',
871 'uploader': 'Beer Games Beer',
872 'uploader_id': 'beergamesbeer',
ec85ded8 873 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 874 'license': 'Standard YouTube License',
cf7e015f
S
875 },
876 }, {
877 'info_dict': {
878 'id': '6h8e8xoXJzg',
879 'ext': 'mp4',
880 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
881 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 882 'duration': 7337,
cf7e015f
S
883 'upload_date': '20150721',
884 'uploader': 'Beer Games Beer',
885 'uploader_id': 'beergamesbeer',
ec85ded8 886 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 887 'license': 'Standard YouTube License',
cf7e015f
S
888 },
889 }, {
890 'info_dict': {
891 'id': 'PUOgX5z9xZw',
892 'ext': 'mp4',
893 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
894 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 895 'duration': 7337,
cf7e015f
S
896 'upload_date': '20150721',
897 'uploader': 'Beer Games Beer',
898 'uploader_id': 'beergamesbeer',
ec85ded8 899 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 900 'license': 'Standard YouTube License',
cf7e015f
S
901 },
902 }, {
903 'info_dict': {
904 'id': 'teuwxikvS5k',
905 'ext': 'mp4',
906 'title': 'teamPGP: Rocket League Noob Stream (zim)',
907 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 908 'duration': 7334,
cf7e015f
S
909 'upload_date': '20150721',
910 'uploader': 'Beer Games Beer',
911 'uploader_id': 'beergamesbeer',
ec85ded8 912 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 913 'license': 'Standard YouTube License',
cf7e015f
S
914 },
915 }],
916 'params': {
917 'skip_download': True,
918 },
4fe54c12 919 'skip': 'This video is not available.',
cbaed4bb 920 },
f9f49d87 921 {
067aa17e 922 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
923 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
924 'info_dict': {
925 'id': 'gVfLd0zydlo',
926 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
927 },
928 'playlist_count': 2,
be49068d 929 'skip': 'Not multifeed anymore',
f9f49d87 930 },
cbaed4bb 931 {
2d3d2997 932 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 933 'only_matching': True,
0e49d9a6 934 },
6d4fc66b 935 {
2d3d2997 936 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
937 'only_matching': True,
938 },
0e49d9a6 939 {
067aa17e 940 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 941 # Also tests cut-off URL expansion in video description (see
067aa17e
S
942 # https://github.com/ytdl-org/youtube-dl/issues/1892,
943 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
944 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
945 'info_dict': {
946 'id': 'lsguqyKfVQg',
947 'ext': 'mp4',
948 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 949 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 950 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 951 'duration': 133,
0e49d9a6
LL
952 'upload_date': '20151119',
953 'uploader_id': 'IronSoulElf',
ec85ded8 954 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 955 'uploader': 'IronSoulElf',
eb6793ba
S
956 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
957 'track': 'Dark Walk - Position Music',
958 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 959 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
960 },
961 'params': {
962 'skip_download': True,
963 },
964 },
61f92af1 965 {
067aa17e 966 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
967 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
968 'only_matching': True,
969 },
313dfc45
LL
970 {
971 # Video with yt:stretch=17:0
972 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
973 'info_dict': {
974 'id': 'Q39EVAstoRM',
975 'ext': 'mp4',
976 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
977 'description': 'md5:ee18a25c350637c8faff806845bddee9',
978 'upload_date': '20151107',
979 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
980 'uploader': 'CH GAMER DROID',
981 },
982 'params': {
983 'skip_download': True,
984 },
be49068d 985 'skip': 'This video does not exist.',
313dfc45 986 },
7caf9830
S
987 {
988 # Video licensed under Creative Commons
989 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
990 'info_dict': {
991 'id': 'M4gD1WSo5mA',
992 'ext': 'mp4',
993 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
994 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 995 'duration': 721,
7caf9830
S
996 'upload_date': '20150127',
997 'uploader_id': 'BerkmanCenter',
ec85ded8 998 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 999 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1000 'license': 'Creative Commons Attribution license (reuse allowed)',
1001 },
1002 'params': {
1003 'skip_download': True,
1004 },
1005 },
fd050249
S
1006 {
1007 # Channel-like uploader_url
1008 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1009 'info_dict': {
1010 'id': 'eQcmzGIKrzg',
1011 'ext': 'mp4',
1012 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1013 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 1014 'duration': 4060,
fd050249 1015 'upload_date': '20151119',
eb6793ba 1016 'uploader': 'Bernie Sanders',
fd050249 1017 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1018 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1019 'license': 'Creative Commons Attribution license (reuse allowed)',
1020 },
1021 'params': {
1022 'skip_download': True,
1023 },
1024 },
040ac686
S
1025 {
1026 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1027 'only_matching': True,
7f29cf54
S
1028 },
1029 {
067aa17e 1030 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1031 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1032 'only_matching': True,
6496ccb4
S
1033 },
1034 {
1035 # Rental video preview
1036 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1037 'info_dict': {
1038 'id': 'uGpuVWrhIzE',
1039 'ext': 'mp4',
1040 'title': 'Piku - Trailer',
1041 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1042 'upload_date': '20150811',
1043 'uploader': 'FlixMatrix',
1044 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1045 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1046 'license': 'Standard YouTube License',
1047 },
1048 'params': {
1049 'skip_download': True,
1050 },
eb6793ba 1051 'skip': 'This video is not available.',
022a5d66 1052 },
12afdc2a
S
1053 {
1054 # YouTube Red video with episode data
1055 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1056 'info_dict': {
1057 'id': 'iqKdEhx-dD4',
1058 'ext': 'mp4',
1059 'title': 'Isolation - Mind Field (Ep 1)',
4fe54c12 1060 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
556dbe7f 1061 'duration': 2085,
12afdc2a
S
1062 'upload_date': '20170118',
1063 'uploader': 'Vsauce',
1064 'uploader_id': 'Vsauce',
1065 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1066 'series': 'Mind Field',
1067 'season_number': 1,
1068 'episode_number': 1,
1069 },
1070 'params': {
1071 'skip_download': True,
1072 },
1073 'expected_warnings': [
1074 'Skipping DASH manifest',
1075 ],
1076 },
c7121fa7
S
1077 {
1078 # The following content has been identified by the YouTube community
1079 # as inappropriate or offensive to some audiences.
1080 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1081 'info_dict': {
1082 'id': '6SJNVb0GnPI',
1083 'ext': 'mp4',
1084 'title': 'Race Differences in Intelligence',
1085 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1086 'duration': 965,
1087 'upload_date': '20140124',
1088 'uploader': 'New Century Foundation',
1089 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1090 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1091 },
1092 'params': {
1093 'skip_download': True,
1094 },
1095 },
022a5d66
S
1096 {
1097 # itag 212
1098 'url': '1t24XAntNCY',
1099 'only_matching': True,
fd5c4aab
S
1100 },
1101 {
1102 # geo restricted to JP
1103 'url': 'sJL6WA-aGkQ',
1104 'only_matching': True,
1105 },
d0ba5587
S
1106 {
1107 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1108 'only_matching': True,
1109 },
cd5a74a2
S
1110 {
1111 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1112 'only_matching': True,
1113 },
825cd268
RA
1114 {
1115 # DRM protected
1116 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1117 'only_matching': True,
4fe54c12
S
1118 },
1119 {
1120 # Video with unsupported adaptive stream type formats
1121 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1122 'info_dict': {
1123 'id': 'Z4Vy8R84T1U',
1124 'ext': 'mp4',
1125 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1126 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1127 'duration': 433,
1128 'upload_date': '20130923',
1129 'uploader': 'Amelia Putri Harwita',
1130 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1131 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1132 'formats': 'maxcount:10',
1133 },
1134 'params': {
1135 'skip_download': True,
1136 'youtube_include_dash_manifest': False,
1137 },
5caabd3c 1138 },
1139 {
822b9d9c 1140 # Youtube Music Auto-generated description
5caabd3c 1141 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1142 'info_dict': {
1143 'id': 'MgNrAu2pzNs',
1144 'ext': 'mp4',
1145 'title': 'Voyeur Girl',
1146 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1147 'upload_date': '20190312',
1148 'uploader': 'Various Artists - Topic',
1149 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1150 'artist': 'Stephen',
1151 'track': 'Voyeur Girl',
1152 'album': 'it\'s too much love to know my dear',
1153 'release_date': '20190313',
1154 'release_year': 2019,
1155 },
1156 'params': {
1157 'skip_download': True,
1158 },
1159 },
1160 {
822b9d9c 1161 # Youtube Music Auto-generated description
5caabd3c 1162 # Retrieve 'artist' field from 'Artist:' in video description
1163 # when it is present on youtube music video
5caabd3c 1164 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1165 'info_dict': {
1166 'id': 'k0jLE7tTwjY',
1167 'ext': 'mp4',
1168 'title': 'Latch Feat. Sam Smith',
1169 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1170 'upload_date': '20150110',
1171 'uploader': 'Various Artists - Topic',
1172 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1173 'artist': 'Disclosure',
1174 'track': 'Latch Feat. Sam Smith',
1175 'album': 'Latch Featuring Sam Smith',
1176 'release_date': '20121008',
1177 'release_year': 2012,
1178 },
1179 'params': {
1180 'skip_download': True,
1181 },
1182 },
1183 {
822b9d9c 1184 # Youtube Music Auto-generated description
5caabd3c 1185 # handle multiple artists on youtube music video
1186 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1187 'info_dict': {
1188 'id': '74qn0eJSjpA',
1189 'ext': 'mp4',
1190 'title': 'Eastside',
1191 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1192 'upload_date': '20180710',
1193 'uploader': 'Benny Blanco - Topic',
1194 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1195 'artist': 'benny blanco, Halsey, Khalid',
1196 'track': 'Eastside',
1197 'album': 'Eastside',
1198 'release_date': '20180713',
1199 'release_year': 2018,
1200 },
1201 'params': {
1202 'skip_download': True,
1203 },
1204 },
1205 {
822b9d9c 1206 # Youtube Music Auto-generated description
5caabd3c 1207 # handle youtube music video with release_year and no release_date
1208 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1209 'info_dict': {
1210 'id': '-hcAI0g-f5M',
1211 'ext': 'mp4',
1212 'title': 'Put It On Me',
1213 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1214 'upload_date': '20180426',
1215 'uploader': 'Matt Maeson - Topic',
1216 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1217 'artist': 'Matt Maeson',
1218 'track': 'Put It On Me',
1219 'album': 'The Hearse',
1220 'release_date': None,
1221 'release_year': 2018,
1222 },
1223 'params': {
1224 'skip_download': True,
1225 },
1226 },
2eb88d95
PH
1227 ]
1228
e0df6211
PH
1229 def __init__(self, *args, **kwargs):
1230 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1231 self._player_cache = {}
e0df6211 1232
c5e8d7af
PH
1233 def report_video_info_webpage_download(self, video_id):
1234 """Report attempt to download video info webpage."""
69ea8ca4 1235 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1236
c5e8d7af
PH
1237 def report_information_extraction(self, video_id):
1238 """Report attempt to extract video information."""
69ea8ca4 1239 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1240
1241 def report_unavailable_format(self, video_id, format):
1242 """Report extracted video URL."""
69ea8ca4 1243 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1244
1245 def report_rtmp_download(self):
1246 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1247 self.to_screen('RTMP download detected')
c5e8d7af 1248
60064c53
PH
1249 def _signature_cache_id(self, example_sig):
1250 """ Return a string representation of a signature """
78caa52a 1251 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53
PH
1252
1253 def _extract_signature_function(self, video_id, player_url, example_sig):
cf010131 1254 id_m = re.match(
63529e93 1255 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
cf010131 1256 player_url)
c081b35c
PH
1257 if not id_m:
1258 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
1259 player_type = id_m.group('ext')
1260 player_id = id_m.group('id')
1261
c4417ddb 1262 # Read from filesystem cache
60064c53
PH
1263 func_id = '%s_%s_%s' % (
1264 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1265 assert os.path.basename(func_id) == func_id
a0e07d31 1266
69ea8ca4 1267 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1268 if cache_spec is not None:
78caa52a 1269 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1270
6d1a55a5
PH
1271 download_note = (
1272 'Downloading player %s' % player_url
1273 if self._downloader.params.get('verbose') else
1274 'Downloading %s player %s' % (player_type, player_id)
1275 )
e0df6211
PH
1276 if player_type == 'js':
1277 code = self._download_webpage(
1278 player_url, video_id,
6d1a55a5 1279 note=download_note,
69ea8ca4 1280 errnote='Download of %s failed' % player_url)
83799698 1281 res = self._parse_sig_js(code)
c4417ddb 1282 elif player_type == 'swf':
e0df6211
PH
1283 urlh = self._request_webpage(
1284 player_url, video_id,
6d1a55a5 1285 note=download_note,
69ea8ca4 1286 errnote='Download of %s failed' % player_url)
e0df6211 1287 code = urlh.read()
83799698 1288 res = self._parse_sig_swf(code)
e0df6211
PH
1289 else:
1290 assert False, 'Invalid player type %r' % player_type
1291
785521bf
PH
1292 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1293 cache_res = res(test_string)
1294 cache_spec = [ord(c) for c in cache_res]
83799698 1295
69ea8ca4 1296 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1297 return res
1298
60064c53 1299 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1300 def gen_sig_code(idxs):
1301 def _genslice(start, end, step):
78caa52a 1302 starts = '' if start == 0 else str(start)
8bcc8756 1303 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1304 steps = '' if step == 1 else (':%d' % step)
78caa52a 1305 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1306
1307 step = None
7af808a5
PH
1308 # Quelch pyflakes warnings - start will be set when step is set
1309 start = '(Never used)'
edf3e38e
PH
1310 for i, prev in zip(idxs[1:], idxs[:-1]):
1311 if step is not None:
1312 if i - prev == step:
1313 continue
1314 yield _genslice(start, prev, step)
1315 step = None
1316 continue
1317 if i - prev in [-1, 1]:
1318 step = i - prev
1319 start = prev
1320 continue
1321 else:
78caa52a 1322 yield 's[%d]' % prev
edf3e38e 1323 if step is None:
78caa52a 1324 yield 's[%d]' % i
edf3e38e
PH
1325 else:
1326 yield _genslice(start, i, step)
1327
78caa52a 1328 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1329 cache_res = func(test_string)
edf3e38e 1330 cache_spec = [ord(c) for c in cache_res]
78caa52a 1331 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1332 signature_id_tuple = '(%s)' % (
1333 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1334 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1335 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1336 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1337
e0df6211
PH
1338 def _parse_sig_js(self, jscode):
1339 funcname = self._search_regex(
abefc03f
S
1340 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1341 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
31ce6e99 1342 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1343 # Obsolete patterns
1344 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1345 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1346 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1347 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1348 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1349 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1350 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1351 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1352 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1353
1354 jsi = JSInterpreter(jscode)
1355 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1356 return lambda s: initial_function([s])
1357
1358 def _parse_sig_swf(self, file_contents):
54256267 1359 swfi = SWFInterpreter(file_contents)
78caa52a 1360 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1361 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1362 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1363 return lambda s: initial_function([s])
1364
83799698 1365 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1366 """Turn the encrypted s field into a working signature"""
6b37f0be 1367
c8bf86d5 1368 if player_url is None:
69ea8ca4 1369 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1370
69ea8ca4 1371 if player_url.startswith('//'):
78caa52a 1372 player_url = 'https:' + player_url
3c90cc8b
S
1373 elif not re.match(r'https?://', player_url):
1374 player_url = compat_urlparse.urljoin(
1375 'https://www.youtube.com', player_url)
c8bf86d5 1376 try:
62af3a0e 1377 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1378 if player_id not in self._player_cache:
1379 func = self._extract_signature_function(
60064c53 1380 video_id, player_url, s
c8bf86d5
PH
1381 )
1382 self._player_cache[player_id] = func
1383 func = self._player_cache[player_id]
1384 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1385 self._print_sig_code(func, s)
c8bf86d5
PH
1386 return func(s)
1387 except Exception as e:
1388 tb = traceback.format_exc()
1389 raise ExtractorError(
78caa52a 1390 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1391
360e1ca5 1392 def _get_subtitles(self, video_id, webpage):
de7f3446 1393 try:
60e47a26 1394 subs_doc = self._download_xml(
38c2e5b8 1395 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1396 video_id, note=False)
1397 except ExtractorError as err:
9b9c5355 1398 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1399 return {}
de7f3446
JMF
1400
1401 sub_lang_list = {}
60e47a26
JMF
1402 for track in subs_doc.findall('track'):
1403 lang = track.attrib['lang_code']
7e660ac1
LD
1404 if lang in sub_lang_list:
1405 continue
360e1ca5 1406 sub_formats = []
23d17e4b 1407 for ext in self._SUBTITLE_FORMATS:
15707c7e 1408 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1409 'lang': lang,
1410 'v': video_id,
1411 'fmt': ext,
1412 'name': track.attrib['name'].encode('utf-8'),
1413 })
1414 sub_formats.append({
1415 'url': 'https://www.youtube.com/api/timedtext?' + params,
1416 'ext': ext,
1417 })
1418 sub_lang_list[lang] = sub_formats
de7f3446 1419 if not sub_lang_list:
69ea8ca4 1420 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1421 return {}
1422 return sub_lang_list
1423
a72778d3
S
1424 def _get_ytplayer_config(self, video_id, webpage):
1425 patterns = (
526b3b07
S
1426 # User data may contain arbitrary character sequences that may affect
1427 # JSON extraction with regex, e.g. when '};' is contained the second
1428 # regex won't capture the whole JSON. Yet working around by trying more
1429 # concrete regex first keeping in mind proper quoted string handling
1430 # to be implemented in future that will replace this workaround (see
067aa17e
S
1431 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1432 # https://github.com/ytdl-org/youtube-dl/pull/7599)
a72778d3
S
1433 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1434 r';ytplayer\.config\s*=\s*({.+?});',
1435 )
1436 config = self._search_regex(
1437 patterns, webpage, 'ytplayer.config', default=None)
1438 if config:
1439 return self._parse_json(
1440 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1441
360e1ca5 1442 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1443 """We need the webpage for getting the captions url, pass it as an
1444 argument to speed up the process."""
69ea8ca4 1445 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1446 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1447 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1448 if not player_config:
de7f3446
JMF
1449 self._downloader.report_warning(err_msg)
1450 return {}
de7f3446 1451 try:
0792d563 1452 args = player_config['args']
b78b292f
S
1453 caption_url = args.get('ttsurl')
1454 if caption_url:
1455 timestamp = args['timestamp']
1456 # We get the available subtitles
15707c7e 1457 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1458 'type': 'list',
1459 'tlangs': 1,
1460 'asrs': 1,
1461 })
1462 list_url = caption_url + '&' + list_params
1463 caption_list = self._download_xml(list_url, video_id)
1464 original_lang_node = caption_list.find('track')
1465 if original_lang_node is None:
1466 self._downloader.report_warning('Video doesn\'t have automatic captions')
1467 return {}
1468 original_lang = original_lang_node.attrib['lang_code']
1469 caption_kind = original_lang_node.attrib.get('kind', '')
1470
1471 sub_lang_list = {}
1472 for lang_node in caption_list.findall('target'):
1473 sub_lang = lang_node.attrib['lang_code']
1474 sub_formats = []
1475 for ext in self._SUBTITLE_FORMATS:
15707c7e 1476 params = compat_urllib_parse_urlencode({
b78b292f
S
1477 'lang': original_lang,
1478 'tlang': sub_lang,
1479 'fmt': ext,
1480 'ts': timestamp,
1481 'kind': caption_kind,
1482 })
1483 sub_formats.append({
1484 'url': caption_url + '&' + params,
1485 'ext': ext,
1486 })
1487 sub_lang_list[sub_lang] = sub_formats
1488 return sub_lang_list
1489
ddbb4c5c
S
1490 def make_captions(sub_url, sub_langs):
1491 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1492 caption_qs = compat_parse_qs(parsed_sub_url.query)
1493 captions = {}
1494 for sub_lang in sub_langs:
1495 sub_formats = []
1496 for ext in self._SUBTITLE_FORMATS:
1497 caption_qs.update({
1498 'tlang': [sub_lang],
1499 'fmt': [ext],
1500 })
1501 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1502 query=compat_urllib_parse_urlencode(caption_qs, True)))
1503 sub_formats.append({
1504 'url': sub_url,
1505 'ext': ext,
1506 })
1507 captions[sub_lang] = sub_formats
1508 return captions
1509
1510 # New captions format as of 22.06.2017
1511 player_response = args.get('player_response')
1512 if player_response and isinstance(player_response, compat_str):
1513 player_response = self._parse_json(
1514 player_response, video_id, fatal=False)
1515 if player_response:
1516 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1517 base_url = renderer['captionTracks'][0]['baseUrl']
1518 sub_lang_list = []
1519 for lang in renderer['translationLanguages']:
1520 lang_code = lang.get('languageCode')
1521 if lang_code:
1522 sub_lang_list.append(lang_code)
1523 return make_captions(base_url, sub_lang_list)
1524
b78b292f
S
1525 # Some videos don't provide ttsurl but rather caption_tracks and
1526 # caption_translation_languages (e.g. 20LmZk1hakA)
ddbb4c5c 1527 # Does not used anymore as of 22.06.2017
b78b292f
S
1528 caption_tracks = args['caption_tracks']
1529 caption_translation_languages = args['caption_translation_languages']
1530 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
ddbb4c5c 1531 sub_lang_list = []
b78b292f
S
1532 for lang in caption_translation_languages.split(','):
1533 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1534 sub_lang = lang_qs.get('lc', [None])[0]
ddbb4c5c
S
1535 if sub_lang:
1536 sub_lang_list.append(sub_lang)
1537 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1538 # An extractor error can be raise by the download process if there are
1539 # no automatic captions but there are subtitles
ddbb4c5c 1540 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1541 self._downloader.report_warning(err_msg)
1542 return {}
1543
21c340b8
S
1544 def _mark_watched(self, video_id, video_info, player_response):
1545 playback_url = url_or_none(try_get(
1546 player_response,
1547 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1548 video_info, lambda x: x['videostats_playback_base_url'][0]))
d77ab8e2
S
1549 if not playback_url:
1550 return
1551 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1552 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1553
1554 # cpn generation algorithm is reverse engineered from base.js.
1555 # In fact it works even with dummy cpn.
1556 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1557 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1558
1559 qs.update({
1560 'ver': ['2'],
1561 'cpn': [cpn],
1562 })
1563 playback_url = compat_urlparse.urlunparse(
15707c7e 1564 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1565
1566 self._download_webpage(
1567 playback_url, video_id, 'Marking watched',
1568 'Unable to mark watched', fatal=False)
1569
66c9fa36
S
1570 @staticmethod
1571 def _extract_urls(webpage):
1572 # Embedded YouTube player
1573 entries = [
1574 unescapeHTML(mobj.group('url'))
1575 for mobj in re.finditer(r'''(?x)
1576 (?:
1577 <iframe[^>]+?src=|
1578 data-video-url=|
1579 <embed[^>]+?src=|
1580 embedSWF\(?:\s*|
1581 <object[^>]+data=|
1582 new\s+SWFObject\(
1583 )
1584 (["\'])
1585 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1586 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1587 \1''', webpage)]
1588
1589 # lazyYT YouTube embed
1590 entries.extend(list(map(
1591 unescapeHTML,
1592 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1593
1594 # Wordpress "YouTube Video Importer" plugin
1595 matches = re.findall(r'''(?x)<div[^>]+
1596 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1597 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1598 entries.extend(m[-1] for m in matches)
1599
1600 return entries
1601
1602 @staticmethod
1603 def _extract_url(webpage):
1604 urls = YoutubeIE._extract_urls(webpage)
1605 return urls[0] if urls else None
1606
97665381
PH
1607 @classmethod
1608 def extract_id(cls, url):
1609 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1610 if mobj is None:
69ea8ca4 1611 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1612 video_id = mobj.group(2)
1613 return video_id
1614
9cafc3fd
S
1615 @staticmethod
1616 def _extract_chapters(description, duration):
1617 if not description:
1618 return None
1619 chapter_lines = re.findall(
1620 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1621 description)
1622 if not chapter_lines:
1623 return None
1624 chapters = []
1625 for next_num, (chapter_line, time_point) in enumerate(
1626 chapter_lines, start=1):
1627 start_time = parse_duration(time_point)
1628 if start_time is None:
1629 continue
39d4c1be
S
1630 if start_time > duration:
1631 break
9cafc3fd
S
1632 end_time = (duration if next_num == len(chapter_lines)
1633 else parse_duration(chapter_lines[next_num][1]))
1634 if end_time is None:
1635 continue
39d4c1be
S
1636 if end_time > duration:
1637 end_time = duration
1638 if start_time > end_time:
1639 break
9cafc3fd
S
1640 chapter_title = re.sub(
1641 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1642 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1643 chapters.append({
1644 'start_time': start_time,
1645 'end_time': end_time,
1646 'title': chapter_title,
1647 })
1648 return chapters
1649
c5e8d7af 1650 def _real_extract(self, url):
cf7e015f
S
1651 url, smuggled_data = unsmuggle_url(url, {})
1652
7e8c0af0 1653 proto = (
78caa52a
PH
1654 'http' if self._downloader.params.get('prefer_insecure', False)
1655 else 'https')
7e8c0af0 1656
7c80519c 1657 start_time = None
297a564b 1658 end_time = None
7c80519c
JMF
1659 parsed_url = compat_urllib_parse_urlparse(url)
1660 for component in [parsed_url.fragment, parsed_url.query]:
1661 query = compat_parse_qs(component)
297a564b 1662 if start_time is None and 't' in query:
7c80519c 1663 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1664 if start_time is None and 'start' in query:
1665 start_time = parse_duration(query['start'][0])
297a564b
JMF
1666 if end_time is None and 'end' in query:
1667 end_time = parse_duration(query['end'][0])
7c80519c 1668
c5e8d7af
PH
1669 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1670 mobj = re.search(self._NEXT_URL_RE, url)
1671 if mobj:
7fd002c0 1672 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1673 video_id = self.extract_id(url)
c5e8d7af
PH
1674
1675 # Get video webpage
aa79ac0c 1676 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
a1f934b1 1677 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
1678
1679 # Attempt to extract SWF player URL
e0df6211 1680 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1681 if mobj is not None:
1682 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1683 else:
1684 player_url = None
1685
d8d24a92
S
1686 dash_mpds = []
1687
1688 def add_dash_mpd(video_info):
1689 dash_mpd = video_info.get('dashmpd')
1690 if dash_mpd and dash_mpd[0] not in dash_mpds:
1691 dash_mpds.append(dash_mpd[0])
1692
561b456e
S
1693 def add_dash_mpd_pr(pl_response):
1694 dash_mpd = url_or_none(try_get(
1695 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1696 compat_str))
1697 if dash_mpd and dash_mpd not in dash_mpds:
1698 dash_mpds.append(dash_mpd)
1699
c7121fa7
S
1700 is_live = None
1701 view_count = None
1702
1703 def extract_view_count(v_info):
1704 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1705
026fbedc
S
1706 def extract_token(v_info):
1707 return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
1708
c2d125d9
S
1709 def extract_player_response(player_response, video_id):
1710 pl_response = str_or_none(player_response)
1711 if not pl_response:
1712 return
1713 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1714 if isinstance(pl_response, dict):
1715 add_dash_mpd_pr(pl_response)
1716 return pl_response
1717
dbdaaa23
S
1718 player_response = {}
1719
c5e8d7af 1720 # Get video info
6449cd80 1721 embed_webpage = None
c108eb73 1722 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
1723 age_gate = True
1724 # We simulate the access to the video from www.youtube.com/v/{video_id}
1725 # this can be viewed without login into Youtube
beb95e77
CL
1726 url = proto + '://www.youtube.com/embed/%s' % video_id
1727 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
15707c7e 1728 data = compat_urllib_parse_urlencode({
2c57c7fa
JMF
1729 'video_id': video_id,
1730 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 1731 'sts': self._search_regex(
beb95e77 1732 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 1733 })
7e8c0af0 1734 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
94bd3613
PH
1735 video_info_webpage = self._download_webpage(
1736 video_info_url, video_id,
20436c30 1737 note='Refetching age-gated info webpage',
94bd3613 1738 errnote='unable to download video info webpage')
c5e8d7af 1739 video_info = compat_parse_qs(video_info_webpage)
c2d125d9
S
1740 pl_response = video_info.get('player_response', [None])[0]
1741 player_response = extract_player_response(pl_response, video_id)
d8d24a92 1742 add_dash_mpd(video_info)
c2d125d9 1743 view_count = extract_view_count(video_info)
c108eb73
JMF
1744 else:
1745 age_gate = False
bc93bdb5 1746 video_info = None
dc4e4f90 1747 sts = None
d8d24a92 1748 # Try looking directly into the video webpage
a72778d3
S
1749 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1750 if ytplayer_config:
4e62ebe2 1751 args = ytplayer_config['args']
4c76aa06 1752 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1753 # Convert to the same format returned by compat_parse_qs
1754 video_info = dict((k, [v]) for k, v in args.items())
1755 add_dash_mpd(video_info)
6496ccb4
S
1756 # Rental video is not rented but preview is available (e.g.
1757 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
067aa17e 1758 # https://github.com/ytdl-org/youtube-dl/issues/10532)
6496ccb4
S
1759 if not video_info and args.get('ypc_vid'):
1760 return self.url_result(
1761 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1762 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1763 is_live = True
dc4e4f90 1764 sts = ytplayer_config.get('sts')
dbdaaa23 1765 if not player_response:
c2d125d9 1766 player_response = extract_player_response(args.get('player_response'), video_id)
0a3cf9ad 1767 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
561b456e 1768 add_dash_mpd_pr(player_response)
0a3cf9ad
S
1769 # We also try looking in get_video_info since it may contain different dashmpd
1770 # URL that points to a DASH manifest with possibly different itag set (some itags
1771 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1772 # manifest pointed by get_video_info's dashmpd).
1773 # The general idea is to take a union of itags of both DASH manifests (for example
067aa17e 1774 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
4e62ebe2 1775 self.report_video_info_webpage_download(video_id)
a61ce714 1776 for el in ('embedded', 'detailpage', 'vevo', ''):
dc4e4f90
S
1777 query = {
1778 'video_id': video_id,
1779 'ps': 'default',
1780 'eurl': '',
1781 'gl': 'US',
1782 'hl': 'en',
1783 }
1784 if el:
1785 query['el'] = el
1786 if sts:
1787 query['sts'] = sts
810fb84d 1788 video_info_webpage = self._download_webpage(
dc4e4f90 1789 '%s://www.youtube.com/get_video_info' % proto,
4e62ebe2 1790 video_id, note=False,
dc4e4f90
S
1791 errnote='unable to download video info webpage',
1792 fatal=False, query=query)
1793 if not video_info_webpage:
1794 continue
0a3cf9ad 1795 get_video_info = compat_parse_qs(video_info_webpage)
dbdaaa23
S
1796 if not player_response:
1797 pl_response = get_video_info.get('player_response', [None])[0]
c2d125d9 1798 player_response = extract_player_response(pl_response, video_id)
fd545fc6 1799 add_dash_mpd(get_video_info)
c7121fa7
S
1800 if view_count is None:
1801 view_count = extract_view_count(get_video_info)
0a3cf9ad
S
1802 if not video_info:
1803 video_info = get_video_info
026fbedc 1804 get_token = extract_token(get_video_info)
56667d62 1805 if get_token:
89ea063e
S
1806 # Different get_video_info requests may report different results, e.g.
1807 # some may report video unavailability, but some may serve it without
067aa17e 1808 # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
89ea063e
S
1809 # the original webpage as well as el=info and el=embedded get_video_info
1810 # requests report video unavailability due to geo restriction while
1811 # el=detailpage succeeds and returns valid data). This is probably
1812 # due to YouTube measures against IP ranges of hosting providers.
1813 # Working around by preferring the first succeeded video_info containing
1814 # the token if no such video_info yet was found.
026fbedc 1815 token = extract_token(video_info)
56667d62 1816 if not token:
44b2264f 1817 video_info = get_video_info
4e62ebe2 1818 break
bbb7c3f7
YCH
1819
1820 def extract_unavailable_message():
0add33ab
S
1821 messages = []
1822 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1823 msg = self._html_search_regex(
1824 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1825 video_webpage, 'unavailable %s' % kind, default=None)
1826 if msg:
1827 messages.append(msg)
1828 if messages:
1829 return '\n'.join(messages)
bbb7c3f7 1830
15be3eb5
RA
1831 if not video_info:
1832 unavailable_message = extract_unavailable_message()
1833 if not unavailable_message:
1834 unavailable_message = 'Unable to extract video data'
1835 raise ExtractorError(
1836 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1837
dbdaaa23
S
1838 video_details = try_get(
1839 player_response, lambda x: x['videoDetails'], dict) or {}
1840
8dbf751a
RA
1841 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1842 if not video_title:
cf7e015f
S
1843 self._downloader.report_warning('Unable to extract video title')
1844 video_title = '_'
1845
9cafc3fd 1846 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1847 if video_description:
fa4bc6e7
RA
1848
1849 def replace_url(m):
1850 redir_url = compat_urlparse.urljoin(url, m.group(1))
1851 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1852 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1853 qs = compat_parse_qs(parsed_redir_url.query)
1854 q = qs.get('q')
1855 if q and q[0]:
1856 return q[0]
1857 return redir_url
1858
9cafc3fd 1859 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1860 <a\s+
25cb7a0e 1861 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1862 (?:title|href)="([^"]+)"\s+
25cb7a0e 1863 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1864 class="[^"]*"[^>]*>
23f13e97 1865 [^<]+\.{3}\s*
cf7e015f 1866 </a>
fa4bc6e7 1867 ''', replace_url, video_description)
cf7e015f
S
1868 video_description = clean_html(video_description)
1869 else:
8dbf751a 1870 video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
cf7e015f 1871
8fe10494 1872 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1873 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1874 multifeed_metadata_list = try_get(
1875 player_response,
1876 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1877 compat_str) or try_get(
1878 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1879 if multifeed_metadata_list:
1880 entries = []
1881 feed_ids = []
1882 for feed in multifeed_metadata_list.split(','):
1883 # Unquote should take place before split on comma (,) since textual
1884 # fields may contain comma as well (see
067aa17e 1885 # https://github.com/ytdl-org/youtube-dl/issues/8536)
8fe10494
S
1886 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1887 entries.append({
1888 '_type': 'url_transparent',
1889 'ie_key': 'Youtube',
1890 'url': smuggle_url(
1891 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1892 {'force_singlefeed': True}),
1893 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1894 })
1895 feed_ids.append(feed_data['id'][0])
1896 self.to_screen(
1897 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1898 % (', '.join(feed_ids), video_id))
1899 return self.playlist_result(entries, video_id, video_title, video_description)
1900 else:
1901 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1902
c7121fa7 1903 if view_count is None:
1c9c8de2 1904 view_count = extract_view_count(video_info)
dbdaaa23
S
1905 if view_count is None and video_details:
1906 view_count = int_or_none(video_details.get('viewCount'))
1d699755 1907
27019dbb 1908 if is_live is None:
898238e9 1909 is_live = bool_or_none(video_details.get('isLive'))
27019dbb 1910
c5e8d7af
PH
1911 # Check for "rental" videos
1912 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
067aa17e 1913 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 1914
c63ca0ee
S
1915 def _extract_filesize(media_url):
1916 return int_or_none(self._search_regex(
1917 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1918
bf1317d2
S
1919 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1920 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1921
c5e8d7af
PH
1922 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1923 self.report_rtmp_download()
dd27fd17
PH
1924 formats = [{
1925 'format_id': '_rtmp',
1926 'protocol': 'rtmp',
1927 'url': video_info['conn'][0],
1928 'player_url': player_url,
1929 }]
bf1317d2 1930 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 1931 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1932 if 'rtmpe%3Dyes' in encoded_url_map:
067aa17e 1933 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
bf1317d2 1934 formats = []
3318832e 1935 formats_spec = {}
82156fdb 1936 fmt_list = video_info.get('fmt_list', [''])[0]
1937 if fmt_list:
1938 for fmt in fmt_list.split(','):
1939 spec = fmt.split('/')
3318832e 1940 if len(spec) > 1:
1941 width_height = spec[1].split('x')
1942 if len(width_height) == 2:
1943 formats_spec[spec[0]] = {
1944 'resolution': spec[1],
1945 'width': int_or_none(width_height[0]),
1946 'height': int_or_none(width_height[1]),
1947 }
bf1317d2
S
1948 for fmt in streaming_formats:
1949 itag = str_or_none(fmt.get('itag'))
1950 if not itag:
201e9eaa 1951 continue
bf1317d2
S
1952 quality = fmt.get('quality')
1953 quality_label = fmt.get('qualityLabel') or quality
1954 formats_spec[itag] = {
1955 'asr': int_or_none(fmt.get('audioSampleRate')),
1956 'filesize': int_or_none(fmt.get('contentLength')),
1957 'format_note': quality_label,
1958 'fps': int_or_none(fmt.get('fps')),
1959 'height': int_or_none(fmt.get('height')),
bf1317d2
S
1960 # bitrate for itag 43 is always 2147483647
1961 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1962 'width': int_or_none(fmt.get('width')),
1963 }
1964
1965 for fmt in streaming_formats:
1966 if fmt.get('drm_families'):
1967 continue
1968 url = url_or_none(fmt.get('url'))
1969
1970 if not url:
1971 cipher = fmt.get('cipher')
1972 if not cipher:
1973 continue
1974 url_data = compat_parse_qs(cipher)
1975 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1976 if not url:
1977 continue
1978 else:
1979 cipher = None
1980 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1981
2f483bc1
S
1982 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1983 # Unsupported FORMAT_STREAM_TYPE_OTF
1984 if stream_type == 3:
1985 continue
6449cd80 1986
bf1317d2
S
1987 format_id = fmt.get('itag') or url_data['itag'][0]
1988 if not format_id:
1989 continue
1990 format_id = compat_str(format_id)
a49eccdf 1991
bf1317d2
S
1992 if cipher:
1993 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1994 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1995 jsplayer_url_json = self._search_regex(
1996 ASSETS_RE,
1997 embed_webpage if age_gate else video_webpage,
1998 'JS player URL (1)', default=None)
1999 if not jsplayer_url_json and not age_gate:
2000 # We need the embed website after all
2001 if embed_webpage is None:
2002 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2003 embed_webpage = self._download_webpage(
2004 embed_url, video_id, 'Downloading embed webpage')
2005 jsplayer_url_json = self._search_regex(
2006 ASSETS_RE, embed_webpage, 'JS player URL')
2007
2008 player_url = json.loads(jsplayer_url_json)
cf010131 2009 if player_url is None:
bf1317d2
S
2010 player_url_json = self._search_regex(
2011 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2012 video_webpage, 'age gate player URL')
2013 player_url = json.loads(player_url_json)
2014
2015 if 'sig' in url_data:
2016 url += '&signature=' + url_data['sig'][0]
2017 elif 's' in url_data:
2018 encrypted_sig = url_data['s'][0]
2019
2020 if self._downloader.params.get('verbose'):
2021 if player_url is None:
2022 player_version = 'unknown'
2023 player_desc = 'unknown'
cf010131 2024 else:
bf1317d2
S
2025 if player_url.endswith('swf'):
2026 player_version = self._search_regex(
2027 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
2028 'flash player', fatal=False)
2029 player_desc = 'flash player %s' % player_version
2030 else:
2031 player_version = self._search_regex(
2032 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
2033 r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
2034 player_url,
2035 'html5 player', fatal=False)
2036 player_desc = 'html5 player %s' % player_version
2037
2038 parts_sizes = self._signature_cache_id(encrypted_sig)
2039 self.to_screen('{%s} signature length %s, %s' %
2040 (format_id, parts_sizes, player_desc))
2041
2042 signature = self._decrypt_signature(
2043 encrypted_sig, video_id, player_url, age_gate)
2044 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2045 url += '&%s=%s' % (sp, signature)
201e9eaa
PH
2046 if 'ratebypass' not in url:
2047 url += '&ratebypass=yes'
c9afb51c 2048
94278f72
YCH
2049 dct = {
2050 'format_id': format_id,
2051 'url': url,
2052 'player_url': player_url,
2053 }
2054 if format_id in self._formats:
2055 dct.update(self._formats[format_id])
3318832e 2056 if format_id in formats_spec:
2057 dct.update(formats_spec[format_id])
94278f72 2058
aabc2be6 2059 # Some itags are not included in DASH manifest thus corresponding formats will
067aa17e 2060 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
aabc2be6
S
2061 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2062 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2063 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 2064
bf1317d2
S
2065 if width is None:
2066 width = int_or_none(fmt.get('width'))
2067 if height is None:
2068 height = int_or_none(fmt.get('height'))
2069
c63ca0ee
S
2070 filesize = int_or_none(url_data.get(
2071 'clen', [None])[0]) or _extract_filesize(url)
2072
bf1317d2
S
2073 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2074 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2075
4878759f
S
2076 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2077 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
bf1317d2 2078 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
54fc90aa 2079
94278f72 2080 more_fields = {
c63ca0ee 2081 'filesize': filesize,
bf1317d2 2082 'tbr': tbr,
c9afb51c
AH
2083 'width': width,
2084 'height': height,
bf1317d2
S
2085 'fps': fps,
2086 'format_note': quality_label or quality,
c9afb51c 2087 }
94278f72
YCH
2088 for key, value in more_fields.items():
2089 if value:
2090 dct[key] = value
bf1317d2 2091 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
aabc2be6
S
2092 if type_:
2093 type_split = type_.split(';')
2094 kind_ext = type_split[0].split('/')
2095 if len(kind_ext) == 2:
94278f72
YCH
2096 kind, _ = kind_ext
2097 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
2098 if kind in ('audio', 'video'):
2099 codecs = None
2100 for mobj in re.finditer(
2101 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2102 if mobj.group('key') == 'codecs':
2103 codecs = mobj.group('val')
2104 break
2105 if codecs:
6310acf5 2106 dct.update(parse_codecs(codecs))
e4a60912
S
2107 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2108 dct['downloader_options'] = {
2109 # Youtube throttles chunks >~10M
2110 'http_chunk_size': 10485760,
2111 }
aabc2be6 2112 formats.append(dct)
c5e8d7af 2113 else:
c3e54389
S
2114 manifest_url = (
2115 url_or_none(try_get(
2116 player_response,
2117 lambda x: x['streamingData']['hlsManifestUrl'],
3089bc74
S
2118 compat_str))
2119 or url_or_none(try_get(
c3e54389
S
2120 video_info, lambda x: x['hlsvp'][0], compat_str)))
2121 if manifest_url:
2122 formats = []
2123 m3u8_formats = self._extract_m3u8_formats(
2124 manifest_url, video_id, 'mp4', fatal=False)
2125 for a_format in m3u8_formats:
2126 itag = self._search_regex(
2127 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2128 if itag:
2129 a_format['format_id'] = itag
2130 if itag in self._formats:
2131 dct = self._formats[itag].copy()
2132 dct.update(a_format)
2133 a_format = dct
2134 a_format['player_url'] = player_url
2135 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2136 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2137 formats.append(a_format)
2138 else:
13577349 2139 error_message = extract_unavailable_message()
c3e54389 2140 if not error_message:
13577349
S
2141 error_message = clean_html(try_get(
2142 player_response, lambda x: x['playabilityStatus']['reason'],
2143 compat_str))
2144 if not error_message:
2145 error_message = clean_html(
2146 try_get(video_info, lambda x: x['reason'][0], compat_str))
c3e54389
S
2147 if error_message:
2148 raise ExtractorError(error_message, expected=True)
2149 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 2150
7e72694b 2151 # uploader
dbdaaa23
S
2152 video_uploader = try_get(
2153 video_info, lambda x: x['author'][0],
2154 compat_str) or str_or_none(video_details.get('author'))
7e72694b
S
2155 if video_uploader:
2156 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2157 else:
2158 self._downloader.report_warning('unable to extract uploader name')
2159
2160 # uploader_id
2161 video_uploader_id = None
2162 video_uploader_url = None
2163 mobj = re.search(
2164 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2165 video_webpage)
2166 if mobj is not None:
2167 video_uploader_id = mobj.group('uploader_id')
2168 video_uploader_url = mobj.group('uploader_url')
2169 else:
2170 self._downloader.report_warning('unable to extract uploader nickname')
2171
b45a9e69 2172 channel_id = (
3089bc74
S
2173 str_or_none(video_details.get('channelId'))
2174 or self._html_search_meta(
2175 'channelId', video_webpage, 'channel id', default=None)
2176 or self._search_regex(
b45a9e69 2177 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2178 video_webpage, 'channel id', default=None, group='id'))
dd4c4492
S
2179 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2180
7e72694b
S
2181 # thumbnail image
2182 # We try first to get a high quality image:
2183 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2184 video_webpage, re.DOTALL)
2185 if m_thumb is not None:
2186 video_thumbnail = m_thumb.group(1)
2187 elif 'thumbnail_url' not in video_info:
2188 self._downloader.report_warning('unable to extract video thumbnail')
2189 video_thumbnail = None
2190 else: # don't panic if we can't find it
2191 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2192
2193 # upload date
2194 upload_date = self._html_search_meta(
2195 'datePublished', video_webpage, 'upload date', default=None)
2196 if not upload_date:
2197 upload_date = self._search_regex(
2198 [r'(?s)id="eow-date.*?>(.*?)</span>',
2199 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2200 video_webpage, 'upload date', default=None)
2201 upload_date = unified_strdate(upload_date)
2202
2203 video_license = self._html_search_regex(
2204 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2205 video_webpage, 'license', default=None)
2206
2207 m_music = re.search(
2208 r'''(?x)
2209 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2210 <ul[^>]*>\s*
2211 <li>(?P<title>.+?)
2212 by (?P<creator>.+?)
2213 (?:
2214 \(.+?\)|
2215 <a[^>]*
2216 (?:
2217 \bhref=["\']/red[^>]*>| # drop possible
2218 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2219 )
2220 .*?
2221 )?</li
2222 ''',
2223 video_webpage)
2224 if m_music:
2225 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2226 video_creator = clean_html(m_music.group('creator'))
2227 else:
2228 video_alt_title = video_creator = None
2229
2230 def extract_meta(field):
2231 return self._html_search_regex(
2232 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2233 video_webpage, field, default=None)
2234
2235 track = extract_meta('Song')
2236 artist = extract_meta('Artist')
92bc97d3 2237 album = extract_meta('Album')
822b9d9c
RA
2238
2239 # Youtube Music Auto-generated description
92bc97d3 2240 release_date = release_year = None
822b9d9c
RA
2241 if video_description:
2242 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2243 if mobj:
2244 if not track:
2245 track = mobj.group('track').strip()
2246 if not artist:
2247 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
92bc97d3
RA
2248 if not album:
2249 album = mobj.group('album'.strip())
822b9d9c
RA
2250 release_year = mobj.group('release_year')
2251 release_date = mobj.group('release_date')
2252 if release_date:
2253 release_date = release_date.replace('-', '')
2254 if not release_year:
2255 release_year = int(release_date[:4])
2256 if release_year:
2257 release_year = int(release_year)
7e72694b
S
2258
2259 m_episode = re.search(
2260 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2261 video_webpage)
2262 if m_episode:
c2dd2dc0 2263 series = unescapeHTML(m_episode.group('series'))
7e72694b
S
2264 season_number = int(m_episode.group('season'))
2265 episode_number = int(m_episode.group('episode'))
2266 else:
2267 series = season_number = episode_number = None
2268
2269 m_cat_container = self._search_regex(
2270 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2271 video_webpage, 'categories', default=None)
2272 if m_cat_container:
2273 category = self._html_search_regex(
2274 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2275 default=None)
2276 video_categories = None if category is None else [category]
2277 else:
2278 video_categories = None
2279
2280 video_tags = [
2281 unescapeHTML(m.group('content'))
2282 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2283
2284 def _extract_count(count_name):
2285 return str_to_int(self._search_regex(
2286 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2287 % re.escape(count_name),
2288 video_webpage, count_name, default=None))
2289
2290 like_count = _extract_count('like')
2291 dislike_count = _extract_count('dislike')
2292
dbdaaa23
S
2293 if view_count is None:
2294 view_count = str_to_int(self._search_regex(
2295 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2296 'view count', default=None))
2297
bf3c9326
S
2298 average_rating = (
2299 float_or_none(video_details.get('averageRating'))
2300 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2301
7e72694b
S
2302 # subtitles
2303 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2304 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2305
2306 video_duration = try_get(
2307 video_info, lambda x: int_or_none(x['length_seconds'][0]))
dbdaaa23
S
2308 if not video_duration:
2309 video_duration = int_or_none(video_details.get('lengthSeconds'))
7e72694b
S
2310 if not video_duration:
2311 video_duration = parse_duration(self._html_search_meta(
2312 'duration', video_webpage, 'video duration'))
2313
2314 # annotations
2315 video_annotations = None
2316 if self._downloader.params.get('writeannotations', False):
64b6a4e9
RA
2317 xsrf_token = self._search_regex(
2318 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2319 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2320 invideo_url = try_get(
2321 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2322 if xsrf_token and invideo_url:
2323 xsrf_field_name = self._search_regex(
2324 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2325 video_webpage, 'xsrf field name',
2326 group='xsrf_field_name', default='session_token')
2327 video_annotations = self._download_webpage(
2328 self._proto_relative_url(invideo_url),
2329 video_id, note='Downloading annotations',
2330 errnote='Unable to download video annotations', fatal=False,
2331 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b
S
2332
2333 chapters = self._extract_chapters(description_original, video_duration)
2334
dd27fd17 2335 # Look for the DASH manifest
203fb43f 2336 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2337 dash_mpd_fatal = True
8ff648e4 2338 for mpd_url in dash_mpds:
d8d24a92 2339 dash_formats = {}
774e208f 2340 try:
05d0d131
YCH
2341 def decrypt_sig(mobj):
2342 s = mobj.group(1)
2343 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2344 return '/signature/%s' % dec_s
2345
8ff648e4 2346 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2347
8ff648e4 2348 for df in self._extract_mpd_formats(
2349 mpd_url, video_id, fatal=dash_mpd_fatal,
2350 formats_dict=self._formats):
c63ca0ee
S
2351 if not df.get('filesize'):
2352 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2353 # Do not overwrite DASH format found in some previous DASH manifest
2354 if df['format_id'] not in dash_formats:
2355 dash_formats[df['format_id']] = df
77c6fb5b
S
2356 # Additional DASH manifests may end up in HTTP Error 403 therefore
2357 # allow them to fail without bug report message if we already have
2358 # some DASH manifest succeeded. This is temporary workaround to reduce
2359 # burst of bug reports until we figure out the reason and whether it
2360 # can be fixed at all.
2361 dash_mpd_fatal = False
774e208f
PH
2362 except (ExtractorError, KeyError) as e:
2363 self.report_warning(
2364 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2365 if dash_formats:
04b3b3df
JMF
2366 # Remove the formats we found through non-DASH, they
2367 # contain less info and it can be wrong, because we use
2368 # fixed values (for example the resolution). See
067aa17e 2369 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
04b3b3df 2370 # example.
d80265cc 2371 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2372 formats.extend(dash_formats.values())
d80044c2 2373
6271f1ca
PH
2374 # Check for malformed aspect ratio
2375 stretched_m = re.search(
2376 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2377 video_webpage)
2378 if stretched_m:
313dfc45
LL
2379 w = float(stretched_m.group('w'))
2380 h = float(stretched_m.group('h'))
5faf9fed
S
2381 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2382 # We will only process correct ratios.
313dfc45 2383 if w > 0 and h > 0:
41f24c32 2384 ratio = w / h
313dfc45
LL
2385 for f in formats:
2386 if f.get('vcodec') != 'none':
2387 f['stretched_ratio'] = ratio
6271f1ca 2388
026fbedc
S
2389 if not formats:
2390 token = extract_token(video_info)
2391 if not token:
2392 if 'reason' in video_info:
2393 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2394 regions_allowed = self._html_search_meta(
2395 'regionsAllowed', video_webpage, default=None)
2396 countries = regions_allowed.split(',') if regions_allowed else None
2397 self.raise_geo_restricted(
2398 msg=video_info['reason'][0], countries=countries)
2399 reason = video_info['reason'][0]
2400 if 'Invalid parameters' in reason:
2401 unavailable_message = extract_unavailable_message()
2402 if unavailable_message:
2403 reason = unavailable_message
2404 raise ExtractorError(
2405 'YouTube said: %s' % reason,
2406 expected=True, video_id=video_id)
2407 else:
2408 raise ExtractorError(
2409 '"token" parameter not in video info for unknown reason',
2410 video_id=video_id)
2411
0d297518
RA
2412 if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
2413 raise ExtractorError('This video is DRM protected.', expected=True)
2414
4bcc7bd1 2415 self._sort_formats(formats)
4ea3be0a 2416
21c340b8 2417 self.mark_watched(video_id, video_info, player_response)
d77ab8e2 2418
4ea3be0a 2419 return {
8bcc8756
JW
2420 'id': video_id,
2421 'uploader': video_uploader,
2422 'uploader_id': video_uploader_id,
fd050249 2423 'uploader_url': video_uploader_url,
dd4c4492
S
2424 'channel_id': channel_id,
2425 'channel_url': channel_url,
8bcc8756 2426 'upload_date': upload_date,
7caf9830 2427 'license': video_license,
936784b2 2428 'creator': video_creator or artist,
8bcc8756 2429 'title': video_title,
936784b2 2430 'alt_title': video_alt_title or track,
8bcc8756
JW
2431 'thumbnail': video_thumbnail,
2432 'description': video_description,
2433 'categories': video_categories,
000b6b5a 2434 'tags': video_tags,
8bcc8756 2435 'subtitles': video_subtitles,
360e1ca5 2436 'automatic_captions': automatic_captions,
8bcc8756
JW
2437 'duration': video_duration,
2438 'age_limit': 18 if age_gate else 0,
2439 'annotations': video_annotations,
9cafc3fd 2440 'chapters': chapters,
7e8c0af0 2441 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2442 'view_count': view_count,
4ea3be0a 2443 'like_count': like_count,
2444 'dislike_count': dislike_count,
bf3c9326 2445 'average_rating': average_rating,
8bcc8756 2446 'formats': formats,
2fe1ff85 2447 'is_live': is_live,
7c80519c 2448 'start_time': start_time,
297a564b 2449 'end_time': end_time,
12afdc2a
S
2450 'series': series,
2451 'season_number': season_number,
2452 'episode_number': episode_number,
936784b2
S
2453 'track': track,
2454 'artist': artist,
5caabd3c 2455 'album': album,
2456 'release_date': release_date,
2457 'release_year': release_year,
4ea3be0a 2458 }
c5e8d7af 2459
5f6a1245 2460
8e7aad20 2461class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2462 IE_DESC = 'YouTube.com playlists'
d67cc9fa 2463 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
2464 (?:https?://)?
2465 (?:\w+\.)?
c5e8d7af 2466 (?:
c0345b82
S
2467 (?:
2468 youtube\.com|
2469 invidio\.us
2470 )
2471 /
feaa5ad7 2472 (?:
87dadd45 2473 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
feaa5ad7
S
2474 \? (?:.*?[&;])*? (?:p|a|list)=
2475 | p/
2476 )|
2477 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
c5e8d7af 2478 )
d67cc9fa 2479 (
409b9324 2480 (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
5f6a1245 2481 # Top tracks, they can also include dots
d67cc9fa
JMF
2482 |(?:MC)[\w\.]*
2483 )
c5e8d7af
PH
2484 .*
2485 |
d0ba5587
S
2486 (%(playlist_id)s)
2487 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
8d81f3e3 2488 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
351f37c0
S
2489 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2490 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
78caa52a 2491 IE_NAME = 'youtube:playlist'
81127aa5
PH
2492 _TESTS = [{
2493 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2494 'info_dict': {
2495 'title': 'ytdl test PL',
a1cf99d0 2496 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
81127aa5
PH
2497 },
2498 'playlist_count': 3,
9291475f
PH
2499 }, {
2500 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2501 'info_dict': {
acf757f4 2502 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
9291475f
PH
2503 'title': 'YDL_Empty_List',
2504 },
2505 'playlist_count': 0,
4201ba13 2506 'skip': 'This playlist is private',
9291475f
PH
2507 }, {
2508 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2509 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2510 'info_dict': {
2511 'title': '29C3: Not my department',
acf757f4 2512 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
13a75688
S
2513 'uploader': 'Christiaan008',
2514 'uploader_id': 'ChRiStIaAn008',
9291475f
PH
2515 },
2516 'playlist_count': 95,
2517 }, {
2518 'note': 'issue #673',
2519 'url': 'PLBB231211A4F62143',
2520 'info_dict': {
f46a8702 2521 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 2522 'id': 'PLBB231211A4F62143',
13a75688
S
2523 'uploader': 'Wickydoo',
2524 'uploader_id': 'Wickydoo',
9291475f
PH
2525 },
2526 'playlist_mincount': 26,
2527 }, {
2528 'note': 'Large playlist',
2529 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2530 'info_dict': {
2531 'title': 'Uploads from Cauchemar',
acf757f4 2532 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
13a75688
S
2533 'uploader': 'Cauchemar',
2534 'uploader_id': 'Cauchemar89',
9291475f
PH
2535 },
2536 'playlist_mincount': 799,
2537 }, {
2538 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2539 'info_dict': {
2540 'title': 'YDL_safe_search',
acf757f4 2541 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
2542 },
2543 'playlist_count': 2,
4201ba13 2544 'skip': 'This playlist is private',
ac7553d0
PH
2545 }, {
2546 'note': 'embedded',
2d3d2997 2547 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0
PH
2548 'playlist_count': 4,
2549 'info_dict': {
2550 'title': 'JODA15',
acf757f4 2551 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
13a75688
S
2552 'uploader': 'milan',
2553 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
ac7553d0 2554 }
87dadd45
S
2555 }, {
2556 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2557 'playlist_mincount': 485,
2558 'info_dict': {
13a75688 2559 'title': '2018 Chinese New Singles (11/6 updated)',
87dadd45 2560 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
13a75688
S
2561 'uploader': 'LBK',
2562 'uploader_id': 'sdragonfang',
87dadd45 2563 }
6b08cdf6
PH
2564 }, {
2565 'note': 'Embedded SWF player',
2d3d2997 2566 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
6b08cdf6
PH
2567 'playlist_count': 4,
2568 'info_dict': {
2569 'title': 'JODA7',
acf757f4 2570 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
13a75688
S
2571 },
2572 'skip': 'This playlist does not exist',
4b7df0d3
JMF
2573 }, {
2574 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2575 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2576 'info_dict': {
acf757f4
PH
2577 'title': 'Uploads from Interstellar Movie',
2578 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688
S
2579 'uploader': 'Interstellar Movie',
2580 'uploader_id': 'InterstellarMovie1',
4b7df0d3 2581 },
481cc733 2582 'playlist_mincount': 21,
dacb3a86
S
2583 }, {
2584 # Playlist URL that does not actually serve a playlist
2585 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2586 'info_dict': {
2587 'id': 'FqZTN594JQw',
2588 'ext': 'webm',
2589 'title': "Smiley's People 01 detective, Adventure Series, Action",
2590 'uploader': 'STREEM',
2591 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2592 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2593 'upload_date': '20150526',
2594 'license': 'Standard YouTube License',
2595 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2596 'categories': ['People & Blogs'],
2597 'tags': list,
dbdaaa23 2598 'view_count': int,
dacb3a86
S
2599 'like_count': int,
2600 'dislike_count': int,
2601 },
2602 'params': {
2603 'skip_download': True,
2604 },
13a75688 2605 'skip': 'This video is not available.',
dacb3a86 2606 'add_ie': [YoutubeIE.ie_key()],
481cc733
S
2607 }, {
2608 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2609 'info_dict': {
2610 'id': 'yeWKywCrFtk',
2611 'ext': 'mp4',
2612 'title': 'Small Scale Baler and Braiding Rugs',
2613 'uploader': 'Backus-Page House Museum',
2614 'uploader_id': 'backuspagemuseum',
ec85ded8 2615 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
481cc733 2616 'upload_date': '20161008',
481cc733
S
2617 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2618 'categories': ['Nonprofits & Activism'],
2619 'tags': list,
2620 'like_count': int,
2621 'dislike_count': int,
2622 },
2623 'params': {
2624 'noplaylist': True,
2625 'skip_download': True,
2626 },
2e18adec
S
2627 }, {
2628 # https://github.com/ytdl-org/youtube-dl/issues/21844
2629 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2630 'info_dict': {
2631 'title': 'Data Analysis with Dr Mike Pound',
2632 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2633 'uploader_id': 'Computerphile',
2634 'uploader': 'Computerphile',
2635 },
2636 'playlist_mincount': 11,
feaa5ad7
S
2637 }, {
2638 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2639 'only_matching': True,
a6857510
S
2640 }, {
2641 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2642 'only_matching': True,
409b9324
S
2643 }, {
2644 # music album playlist
2645 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2646 'only_matching': True,
c0345b82
S
2647 }, {
2648 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2649 'only_matching': True,
81127aa5 2650 }]
c5e8d7af 2651
880e1c52
JMF
2652 def _real_initialize(self):
2653 self._login()
2654
351f37c0
S
2655 def extract_videos_from_page(self, page):
2656 ids_in_page = []
2657 titles_in_page = []
2658
2659 for item in re.findall(
2660 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2661 attrs = extract_attributes(item)
2662 video_id = attrs['data-video-id']
2663 video_title = unescapeHTML(attrs.get('data-title'))
2664 if video_title:
2665 video_title = video_title.strip()
2666 ids_in_page.append(video_id)
2667 titles_in_page.append(video_title)
2668
2669 # Fallback with old _VIDEO_RE
2670 self.extract_videos_from_page_impl(
2671 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2672
2673 # Relaxed fallbacks
2674 self.extract_videos_from_page_impl(
2675 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2676 ids_in_page, titles_in_page)
2677 self.extract_videos_from_page_impl(
2678 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2679 ids_in_page, titles_in_page)
2680
2681 return zip(ids_in_page, titles_in_page)
2682
652cdaa2 2683 def _extract_mix(self, playlist_id):
99209c29 2684 # The mixes are generated from a single video
652cdaa2 2685 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
2686 ids = []
2687 last_id = playlist_id[-11:]
2688 for n in itertools.count(1):
2689 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2690 webpage = self._download_webpage(
2691 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2692 new_ids = orderedSet(re.findall(
2693 r'''(?xs)data-video-username=".*?".*?
2694 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2695 webpage))
2696 # Fetch new pages until all the videos are repeated, it seems that
2697 # there are always 51 unique videos.
2698 new_ids = [_id for _id in new_ids if _id not in ids]
2699 if not new_ids:
2700 break
2701 ids.extend(new_ids)
2702 last_id = ids[-1]
2703
2704 url_results = self._ids_to_results(ids)
2705
bc2f773b 2706 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5 2707 title_span = (
3089bc74
S
2708 search_title('playlist-title')
2709 or search_title('title long-title')
2710 or search_title('title'))
76d1700b 2711 title = clean_html(title_span)
652cdaa2
JMF
2712
2713 return self.playlist_result(url_results, playlist_id, title)
2714
448830ce 2715 def _extract_playlist(self, playlist_id):
dbb94fb0
S
2716 url = self._TEMPLATE_URL % playlist_id
2717 page = self._download_webpage(url, playlist_id)
dbb94fb0 2718
067aa17e 2719 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
8bc0800d 2720 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
39b62db1
YCH
2721 match = match.strip()
2722 # Check if the playlist exists or is private
4201ba13
S
2723 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2724 if mobj:
2725 reason = mobj.group('reason')
2726 message = 'This playlist %s' % reason
2727 if 'private' in reason:
2728 message += ', use --username or --netrc to access it'
2729 message += '.'
2730 raise ExtractorError(message, expected=True)
39b62db1
YCH
2731 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2732 raise ExtractorError(
2733 'Invalid parameters. Maybe URL is incorrect.',
2734 expected=True)
2735 elif re.match(r'[^<]*Choose your language[^<]*', match):
2736 continue
2737 else:
2738 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 2739
dbb94fb0 2740 playlist_title = self._html_search_regex(
63b4295d 2741 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
dacb3a86 2742 page, 'title', default=None)
c5e8d7af 2743
07aeced6 2744 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
4e3f1f04 2745 uploader = self._html_search_regex(
07aeced6
S
2746 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2747 page, 'uploader', default=None)
2748 mobj = re.search(
2749 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2750 page)
2751 if mobj:
2752 uploader_id = mobj.group('uploader_id')
2753 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2754 else:
2755 uploader_id = uploader_url = None
2756
dacb3a86
S
2757 has_videos = True
2758
2759 if not playlist_title:
2760 try:
2761 # Some playlist URLs don't actually serve a playlist (e.g.
2762 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2763 next(self._entries(page, playlist_id))
2764 except StopIteration:
2765 has_videos = False
2766
07aeced6 2767 playlist = self.playlist_result(
dacb3a86 2768 self._entries(page, playlist_id), playlist_id, playlist_title)
07aeced6
S
2769 playlist.update({
2770 'uploader': uploader,
2771 'uploader_id': uploader_id,
2772 'uploader_url': uploader_url,
2773 })
2774
2775 return has_videos, playlist
c5e8d7af 2776
ebf1b291 2777 def _check_download_just_video(self, url, playlist_id):
448830ce
S
2778 # Check if it's a video-specific URL
2779 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
481cc733 2780 video_id = query_dict.get('v', [None])[0] or self._search_regex(
87dadd45 2781 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
481cc733
S
2782 'video id', default=None)
2783 if video_id:
448830ce
S
2784 if self._downloader.params.get('noplaylist'):
2785 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
dacb3a86 2786 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce
S
2787 else:
2788 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
dacb3a86
S
2789 return video_id, None
2790 return None, None
448830ce 2791
ebf1b291
S
2792 def _real_extract(self, url):
2793 # Extract playlist id
2794 mobj = re.match(self._VALID_URL, url)
2795 if mobj is None:
2796 raise ExtractorError('Invalid URL: %s' % url)
2797 playlist_id = mobj.group(1) or mobj.group(2)
2798
dacb3a86 2799 video_id, video = self._check_download_just_video(url, playlist_id)
ebf1b291
S
2800 if video:
2801 return video
2802
466a6145 2803 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
2804 # Mixes require a custom extraction process
2805 return self._extract_mix(playlist_id)
2806
dacb3a86
S
2807 has_videos, playlist = self._extract_playlist(playlist_id)
2808 if has_videos or not video_id:
2809 return playlist
2810
2811 # Some playlist URLs don't actually serve a playlist (see
067aa17e 2812 # https://github.com/ytdl-org/youtube-dl/issues/10537).
dacb3a86
S
2813 # Fallback to plain video extraction if there is a video id
2814 # along with playlist id.
2815 return self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce 2816
c5e8d7af 2817
648e6a1f 2818class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2819 IE_DESC = 'YouTube.com channels'
cd5a74a2 2820 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 2821 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 2822 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 2823 IE_NAME = 'youtube:channel'
cdc628a4
PH
2824 _TESTS = [{
2825 'note': 'paginated channel',
2826 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2827 'playlist_mincount': 91,
acf757f4 2828 'info_dict': {
9170ca5b
JMF
2829 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2830 'title': 'Uploads from lex will',
13a75688
S
2831 'uploader': 'lex will',
2832 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
acf757f4 2833 }
5c43afd4
JMF
2834 }, {
2835 'note': 'Age restricted channel',
2836 # from https://www.youtube.com/user/DeusExOfficial
2837 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2838 'playlist_mincount': 64,
2839 'info_dict': {
2840 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2841 'title': 'Uploads from Deus Ex',
13a75688
S
2842 'uploader': 'Deus Ex',
2843 'uploader_id': 'DeusExOfficial',
5c43afd4 2844 },
cd5a74a2
S
2845 }, {
2846 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2847 'only_matching': True,
cdc628a4 2848 }]
c5e8d7af 2849
e462474e
S
2850 @classmethod
2851 def suitable(cls, url):
f07e276a
S
2852 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2853 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 2854
9558dcec
S
2855 def _build_template_url(self, url, channel_id):
2856 return self._TEMPLATE_URL % channel_id
2857
c5e8d7af 2858 def _real_extract(self, url):
9ff67727 2859 channel_id = self._match_id(url)
c5e8d7af 2860
9558dcec 2861 url = self._build_template_url(url, channel_id)
386bdfa6
S
2862
2863 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2864 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2865 # otherwise fallback on channel by page extraction
2866 channel_page = self._download_webpage(
2867 url + '?view=57', channel_id,
2868 'Downloading channel page', fatal=False)
2b3c2546
PH
2869 if channel_page is False:
2870 channel_playlist_id = False
2871 else:
2872 channel_playlist_id = self._html_search_meta(
2873 'channelId', channel_page, 'channel id', default=None)
2874 if not channel_playlist_id:
73c4ac2c
S
2875 channel_url = self._html_search_meta(
2876 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2877 channel_page, 'channel url', default=None)
2878 if channel_url:
2879 channel_playlist_id = self._search_regex(
2880 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2881 channel_url, 'channel id', default=None)
386bdfa6
S
2882 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2883 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
2884 return self.url_result(
2885 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 2886
60bf45c8 2887 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
2888 autogenerated = re.search(r'''(?x)
2889 class="[^"]*?(?:
2890 channel-header-autogenerated-label|
2891 yt-channel-title-autogenerated
2892 )[^"]*"''', channel_page) is not None
c5e8d7af 2893
b9643eed
JMF
2894 if autogenerated:
2895 # The videos are contained in a single page
2896 # the ajax pages can't be used, they are empty
b82f815f 2897 entries = [
fb69240c
S
2898 self.url_result(
2899 video_id, 'Youtube', video_id=video_id,
2900 video_title=video_title)
8f02ad4f 2901 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
2902 return self.playlist_result(entries, channel_id)
2903
73c4ac2c
S
2904 try:
2905 next(self._entries(channel_page, channel_id))
2906 except StopIteration:
2907 alert_message = self._html_search_regex(
2908 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2909 channel_page, 'alert', default=None, group='alert')
2910 if alert_message:
2911 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2912
648e6a1f 2913 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
2914
2915
eb0f3e7e 2916class YoutubeUserIE(YoutubeChannelIE):
78caa52a 2917 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
ea696249 2918 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
9558dcec 2919 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 2920 IE_NAME = 'youtube:user'
c5e8d7af 2921
cdc628a4
PH
2922 _TESTS = [{
2923 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2924 'playlist_mincount': 320,
2925 'info_dict': {
73c4ac2c
S
2926 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2927 'title': 'Uploads from The Linux Foundation',
13a75688
S
2928 'uploader': 'The Linux Foundation',
2929 'uploader_id': 'TheLinuxFoundation',
cdc628a4 2930 }
9558dcec
S
2931 }, {
2932 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2933 # but not https://www.youtube.com/user/12minuteathlete/videos
2934 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2935 'playlist_mincount': 249,
2936 'info_dict': {
2937 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2938 'title': 'Uploads from 12 Minute Athlete',
13a75688
S
2939 'uploader': '12 Minute Athlete',
2940 'uploader_id': 'the12minuteathlete',
9558dcec 2941 }
cdc628a4
PH
2942 }, {
2943 'url': 'ytuser:phihag',
2944 'only_matching': True,
daa0df9e
YCH
2945 }, {
2946 'url': 'https://www.youtube.com/c/gametrailers',
2947 'only_matching': True,
9558dcec
S
2948 }, {
2949 'url': 'https://www.youtube.com/gametrailers',
2950 'only_matching': True,
73c4ac2c 2951 }, {
0e879f43 2952 # This channel is not available, geo restricted to JP
73c4ac2c
S
2953 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2954 'only_matching': True,
cdc628a4
PH
2955 }]
2956
e3ea4790 2957 @classmethod
f4b05232 2958 def suitable(cls, url):
e3ea4790
JMF
2959 # Don't return True if the url can be extracted with other youtube
2960 # extractor, the regex would is too permissive and it would match.
f3a58d46 2961 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2962 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
2963 return False
2964 else:
2965 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 2966
9558dcec
S
2967 def _build_template_url(self, url, channel_id):
2968 mobj = re.match(self._VALID_URL, url)
2969 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2970
b05654f0 2971
f07e276a
S
2972class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2973 IE_DESC = 'YouTube.com live streams'
073d5bf5 2974 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
f07e276a
S
2975 IE_NAME = 'youtube:live'
2976
2977 _TESTS = [{
2d3d2997 2978 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
f07e276a
S
2979 'info_dict': {
2980 'id': 'a48o2S1cPoo',
2981 'ext': 'mp4',
2982 'title': 'The Young Turks - Live Main Show',
2983 'uploader': 'The Young Turks',
2984 'uploader_id': 'TheYoungTurks',
ec85ded8 2985 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
f07e276a
S
2986 'upload_date': '20150715',
2987 'license': 'Standard YouTube License',
2988 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2989 'categories': ['News & Politics'],
2990 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2991 'like_count': int,
2992 'dislike_count': int,
2993 },
2994 'params': {
2995 'skip_download': True,
2996 },
2997 }, {
2d3d2997 2998 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
f07e276a 2999 'only_matching': True,
c1b2a085
S
3000 }, {
3001 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3002 'only_matching': True,
073d5bf5
S
3003 }, {
3004 'url': 'https://www.youtube.com/TheYoungTurks/live',
3005 'only_matching': True,
f07e276a
S
3006 }]
3007
3008 def _real_extract(self, url):
3009 mobj = re.match(self._VALID_URL, url)
3010 channel_id = mobj.group('id')
3011 base_url = mobj.group('base_url')
3012 webpage = self._download_webpage(url, channel_id, fatal=False)
3013 if webpage:
3014 page_type = self._og_search_property(
e7f3529f 3015 'type', webpage, 'page type', default='')
f07e276a
S
3016 video_id = self._html_search_meta(
3017 'videoId', webpage, 'video id', default=None)
e7f3529f
S
3018 if page_type.startswith('video') and video_id and re.match(
3019 r'^[0-9A-Za-z_-]{11}$', video_id):
f07e276a
S
3020 return self.url_result(video_id, YoutubeIE.ie_key())
3021 return self.url_result(base_url)
3022
3023
e462474e
S
3024class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3025 IE_DESC = 'YouTube.com user/channel playlists'
3026 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
3027 IE_NAME = 'youtube:playlists'
0c148415 3028
e568c223 3029 _TESTS = [{
2d3d2997 3030 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
0c148415
S
3031 'playlist_mincount': 4,
3032 'info_dict': {
3033 'id': 'ThirstForScience',
13a75688 3034 'title': 'ThirstForScience',
0c148415 3035 },
e568c223
S
3036 }, {
3037 # with "Load more" button
2d3d2997 3038 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
e568c223
S
3039 'playlist_mincount': 70,
3040 'info_dict': {
3041 'id': 'igorkle1',
3042 'title': 'Игорь Клейнер',
3043 },
e462474e
S
3044 }, {
3045 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3046 'playlist_mincount': 17,
3047 'info_dict': {
3048 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3049 'title': 'Chem Player',
3050 },
13a75688 3051 'skip': 'Blocked',
e568c223 3052 }]
0c148415
S
3053
3054
870f3bfc
S
3055class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3056 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3057
3058
3059class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
78caa52a 3060 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
3061 # there doesn't appear to be a real limit, for example if you search for
3062 # 'python' you get more than 8.000.000 results
3063 _MAX_RESULTS = float('inf')
78caa52a 3064 IE_NAME = 'youtube:search'
b05654f0 3065 _SEARCH_KEY = 'ytsearch'
b4c08069 3066 _EXTRA_QUERY_ARGS = {}
9dd8e46a 3067 _TESTS = []
b05654f0 3068
b05654f0
PH
3069 def _get_n_results(self, query, n):
3070 """Get a specified number of results for a query"""
3071
b4c08069 3072 videos = []
b05654f0
PH
3073 limit = n
3074
a22b2fd1
YCH
3075 url_query = {
3076 'search_query': query.encode('utf-8'),
3077 }
3078 url_query.update(self._EXTRA_QUERY_ARGS)
3079 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3080
b4c08069 3081 for pagenum in itertools.count(1):
b4c08069 3082 data = self._download_json(
69ea8ca4 3083 result_url, video_id='query "%s"' % query,
b4c08069 3084 note='Downloading page %s' % pagenum,
a22b2fd1
YCH
3085 errnote='Unable to download API page',
3086 query={'spf': 'navigate'})
b4c08069 3087 html_content = data[1]['body']['content']
7cc3570e 3088
b4c08069 3089 if 'class="search-message' in html_content:
07ad22b8 3090 raise ExtractorError(
78caa52a 3091 '[youtube] No video results', expected=True)
b05654f0 3092
870f3bfc 3093 new_videos = list(self._process_page(html_content))
b4c08069
JMF
3094 videos += new_videos
3095 if not new_videos or len(videos) > limit:
3096 break
a22b2fd1
YCH
3097 next_link = self._html_search_regex(
3098 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3099 html_content, 'next link', default=None)
3100 if next_link is None:
3101 break
3102 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
b05654f0 3103
b4c08069
JMF
3104 if len(videos) > n:
3105 videos = videos[:n]
b05654f0 3106 return self.playlist_result(videos, query)
75dff0ee 3107
c9ae7b95 3108
a3dd9248 3109class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3110 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3111 _SEARCH_KEY = 'ytsearchdate'
78caa52a 3112 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 3113 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 3114
c9ae7b95 3115
870f3bfc 3116class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
78caa52a
PH
3117 IE_DESC = 'YouTube.com search URLs'
3118 IE_NAME = 'youtube:search_url'
d2c1f79f 3119 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
cdc628a4
PH
3120 _TESTS = [{
3121 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3122 'playlist_mincount': 5,
3123 'info_dict': {
3124 'title': 'youtube-dl test video',
3125 }
d2c1f79f
S
3126 }, {
3127 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3128 'only_matching': True,
cdc628a4 3129 }]
c9ae7b95
PH
3130
3131 def _real_extract(self, url):
3132 mobj = re.match(self._VALID_URL, url)
7fd002c0 3133 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 3134 webpage = self._download_webpage(url, query)
175c2e9e 3135 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
3136
3137
136dadde 3138class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 3139 IE_DESC = 'YouTube.com (multi-season) shows'
92519402 3140 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 3141 IE_NAME = 'youtube:show'
cdc628a4 3142 _TESTS = [{
4003bd82 3143 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 3144 'playlist_mincount': 5,
cdc628a4
PH
3145 'info_dict': {
3146 'id': 'airdisasters',
3147 'title': 'Air Disasters',
3148 }
3149 }]
75dff0ee
JMF
3150
3151 def _real_extract(self, url):
136dadde
S
3152 playlist_id = self._match_id(url)
3153 return super(YoutubeShowIE, self)._real_extract(
3154 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
3155
3156
b2e8bc1b 3157class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 3158 """
25f14e9f 3159 Base class for feed extractors
d7ae0639
JMF
3160 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3161 """
b2e8bc1b 3162 _LOGIN_REQUIRED = True
d7ae0639
JMF
3163
3164 @property
3165 def IE_NAME(self):
78caa52a 3166 return 'youtube:%s' % self._FEED_NAME
04cc9617 3167
81f0259b 3168 def _real_initialize(self):
b2e8bc1b 3169 self._login()
81f0259b 3170
3853309f 3171 def _entries(self, page):
2bc43303
JMF
3172 # The extraction process is the same as for playlists, but the regex
3173 # for the video ids doesn't contain an index
3174 ids = []
3175 more_widget_html = content_html = page
2bc43303
JMF
3176 for page_num in itertools.count(1):
3177 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
3178
3179 # 'recommended' feed has infinite 'load more' and each new portion spins
3180 # the same videos in (sometimes) slightly different order, so we'll check
3181 # for unicity and break when portion has no new videos
3853309f 3182 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
62c95fd5
S
3183 if not new_ids:
3184 break
3185
2bc43303
JMF
3186 ids.extend(new_ids)
3187
3853309f
S
3188 for entry in self._ids_to_results(new_ids):
3189 yield entry
3190
2bc43303
JMF
3191 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3192 if not mobj:
3193 break
3194
3195 more = self._download_json(
25f14e9f 3196 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303
JMF
3197 'Downloading page #%s' % page_num,
3198 transform_source=uppercase_escape)
3199 content_html = more['content_html']
3200 more_widget_html = more['load_more_widget_html']
3201
3853309f
S
3202 def _real_extract(self, url):
3203 page = self._download_webpage(
3204 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3205 self._PLAYLIST_TITLE)
25f14e9f 3206 return self.playlist_result(
3853309f 3207 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
25f14e9f
S
3208
3209
3210class YoutubeWatchLaterIE(YoutubePlaylistIE):
3211 IE_NAME = 'youtube:watchlater'
3212 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
92519402 3213 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 3214
bc7a9cd8
S
3215 _TESTS = [{
3216 'url': 'https://www.youtube.com/playlist?list=WL',
3217 'only_matching': True,
3218 }, {
3219 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3220 'only_matching': True,
3221 }]
25f14e9f
S
3222
3223 def _real_extract(self, url):
7e5dc339 3224 _, video = self._check_download_just_video(url, 'WL')
ebf1b291
S
3225 if video:
3226 return video
dacb3a86
S
3227 _, playlist = self._extract_playlist('WL')
3228 return playlist
f459d170 3229
5f6a1245 3230
c626a3d9 3231class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 3232 IE_NAME = 'youtube:favorites'
f3a34072 3233 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
92519402 3234 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
3235 _LOGIN_REQUIRED = True
3236
3237 def _real_extract(self, url):
3238 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 3239 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 3240 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
3241
3242
25f14e9f
S
3243class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3244 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
92519402 3245 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
25f14e9f
S
3246 _FEED_NAME = 'recommended'
3247 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 3248
1ed5b5c9 3249
25f14e9f
S
3250class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3251 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
92519402 3252 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
25f14e9f
S
3253 _FEED_NAME = 'subscriptions'
3254 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 3255
1ed5b5c9 3256
25f14e9f
S
3257class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3258 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
92519402 3259 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
25f14e9f
S
3260 _FEED_NAME = 'history'
3261 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
3262
3263
15870e90
PH
3264class YoutubeTruncatedURLIE(InfoExtractor):
3265 IE_NAME = 'youtube:truncated_url'
3266 IE_DESC = False # Do not list
975d35db 3267 _VALID_URL = r'''(?x)
b95aab84
PH
3268 (?:https?://)?
3269 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3270 (?:watch\?(?:
c4808c60 3271 feature=[a-z_]+|
b95aab84
PH
3272 annotation_id=annotation_[^&]+|
3273 x-yt-cl=[0-9]+|
c1708b89 3274 hl=[^&]*|
287be8c6 3275 t=[0-9]+
b95aab84
PH
3276 )?
3277 |
3278 attribution_link\?a=[^&]+
3279 )
3280 $
975d35db 3281 '''
15870e90 3282
c4808c60 3283 _TESTS = [{
2d3d2997 3284 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3285 'only_matching': True,
dc2fc736 3286 }, {
2d3d2997 3287 'url': 'https://www.youtube.com/watch?',
dc2fc736 3288 'only_matching': True,
b95aab84
PH
3289 }, {
3290 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3291 'only_matching': True,
3292 }, {
3293 'url': 'https://www.youtube.com/watch?feature=foo',
3294 'only_matching': True,
c1708b89
PH
3295 }, {
3296 'url': 'https://www.youtube.com/watch?hl=en-GB',
3297 'only_matching': True,
287be8c6
PH
3298 }, {
3299 'url': 'https://www.youtube.com/watch?t=2372',
3300 'only_matching': True,
c4808c60
PH
3301 }]
3302
15870e90
PH
3303 def _real_extract(self, url):
3304 raise ExtractorError(
78caa52a
PH
3305 'Did you forget to quote the URL? Remember that & is a meta '
3306 'character in most shells, so you want to put the URL in quotes, '
3307 'like youtube-dl '
2d3d2997 3308 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
78caa52a 3309 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3310 expected=True)
772fd5cc
PH
3311
3312
3313class YoutubeTruncatedIDIE(InfoExtractor):
3314 IE_NAME = 'youtube:truncated_id'
3315 IE_DESC = False # Do not list
b95aab84 3316 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3317
3318 _TESTS = [{
3319 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3320 'only_matching': True,
3321 }]
3322
3323 def _real_extract(self, url):
3324 video_id = self._match_id(url)
3325 raise ExtractorError(
3326 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3327 expected=True)