]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
[youtube] Add support for yt.lelux.fi (#22597)
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
f8c55c66 19 compat_HTTPError,
8d81f3e3 20 compat_kwargs,
c5e8d7af 21 compat_parse_qs,
7fd002c0
S
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
c5e8d7af 27 compat_str,
4bb4a188
PH
28)
29from ..utils import (
27019dbb 30 bool_or_none,
c5e8d7af 31 clean_html,
026fbedc 32 dict_get,
9b9c5355 33 error_to_compat_str,
351f37c0 34 extract_attributes,
c5e8d7af 35 ExtractorError,
2d30521a 36 float_or_none,
4bb4a188
PH
37 get_element_by_attribute,
38 get_element_by_id,
dd27fd17 39 int_or_none,
94278f72 40 mimetype2ext,
4bb4a188 41 orderedSet,
6310acf5 42 parse_codecs,
7c80519c 43 parse_duration,
0cb58b02 44 remove_quotes,
3995d37d 45 remove_start,
cf7e015f 46 smuggle_url,
dbdaaa23 47 str_or_none,
c93d53f5 48 str_to_int,
556dbe7f 49 try_get,
c5e8d7af
PH
50 unescapeHTML,
51 unified_strdate,
cf7e015f 52 unsmuggle_url,
81c2f20b 53 uppercase_escape,
21c340b8 54 url_or_none,
6e6bc8da 55 urlencode_postdata,
c5e8d7af
PH
56)
57
5f6a1245 58
de7f3446 59class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
60 """Provide base functions for Youtube extractors"""
61 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 62 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
63
64 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
65 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
66 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 67
b2e8bc1b
JMF
68 _NETRC_MACHINE = 'youtube'
69 # If True it will raise an error if no login info is provided
70 _LOGIN_REQUIRED = False
71
409b9324 72 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
d0ba5587 73
b2e8bc1b 74 def _set_language(self):
810fb84d
PH
75 self._set_cookie(
76 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 77 # YouTube sets the expire time to about two months
810fb84d 78 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 79
25f14e9f
S
80 def _ids_to_results(self, ids):
81 return [
82 self.url_result(vid_id, 'Youtube', video_id=vid_id)
83 for vid_id in ids]
84
b2e8bc1b 85 def _login(self):
83317f69 86 """
87 Attempt to log in to YouTube.
88 True is returned if successful or skipped.
89 False is returned if login failed.
90
91 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
92 """
68217024 93 username, password = self._get_login_info()
b2e8bc1b
JMF
94 # No authentication to be performed
95 if username is None:
70d35d16 96 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 97 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 98 return True
b2e8bc1b 99
7cc3570e
PH
100 login_page = self._download_webpage(
101 self._LOGIN_URL, None,
69ea8ca4
PH
102 note='Downloading login page',
103 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
104 if login_page is False:
105 return
b2e8bc1b 106
1212e997 107 login_form = self._hidden_inputs(login_page)
c5e8d7af 108
e00eb564
S
109 def req(url, f_req, note, errnote):
110 data = login_form.copy()
111 data.update({
112 'pstMsg': 1,
113 'checkConnection': 'youtube',
114 'checkedDomains': 'youtube',
115 'hl': 'en',
116 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 117 'f.req': json.dumps(f_req),
e00eb564
S
118 'flowName': 'GlifWebSignIn',
119 'flowEntry': 'ServiceLogin',
baf67a60
S
120 # TODO: reverse actual botguard identifier generation algo
121 'bgRequest': '["identifier",""]',
041bc3ad 122 })
e00eb564
S
123 return self._download_json(
124 url, None, note=note, errnote=errnote,
125 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
126 fatal=False,
127 data=urlencode_postdata(data), headers={
128 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
129 'Google-Accounts-XSRF': 1,
130 })
131
3995d37d
S
132 def warn(message):
133 self._downloader.report_warning(message)
134
135 lookup_req = [
136 username,
137 None, [], None, 'US', None, None, 2, False, True,
138 [
139 None, None,
140 [2, 1, None, 1,
141 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
142 None, [], 4],
143 1, [None, None, []], None, None, None, True
144 ],
145 username,
146 ]
147
e00eb564 148 lookup_results = req(
3995d37d 149 self._LOOKUP_URL, lookup_req,
e00eb564
S
150 'Looking up account info', 'Unable to look up account info')
151
152 if lookup_results is False:
153 return False
041bc3ad 154
3995d37d
S
155 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
156 if not user_hash:
157 warn('Unable to extract user hash')
158 return False
159
160 challenge_req = [
161 user_hash,
162 None, 1, None, [1, None, None, None, [password, None, True]],
163 [
164 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
165 1, [None, None, []], None, None, None, True
166 ]]
83317f69 167
3995d37d
S
168 challenge_results = req(
169 self._CHALLENGE_URL, challenge_req,
170 'Logging in', 'Unable to log in')
83317f69 171
3995d37d 172 if challenge_results is False:
e00eb564 173 return
83317f69 174
3995d37d
S
175 login_res = try_get(challenge_results, lambda x: x[0][5], list)
176 if login_res:
177 login_msg = try_get(login_res, lambda x: x[5], compat_str)
178 warn(
179 'Unable to login: %s' % 'Invalid password'
180 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
181 return False
182
183 res = try_get(challenge_results, lambda x: x[0][-1], list)
184 if not res:
185 warn('Unable to extract result entry')
186 return False
187
9a6628aa
S
188 login_challenge = try_get(res, lambda x: x[0][0], list)
189 if login_challenge:
190 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
191 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
192 # SEND_SUCCESS - TFA code has been successfully sent to phone
193 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 194 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
195 if status == 'QUOTA_EXCEEDED':
196 warn('Exceeded the limit of TFA codes, try later')
197 return False
198
199 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
200 if not tl:
201 warn('Unable to extract TL')
202 return False
203
204 tfa_code = self._get_tfa_info('2-step verification code')
205
206 if not tfa_code:
207 warn(
208 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
209 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
210 return False
211
212 tfa_code = remove_start(tfa_code, 'G-')
213
214 tfa_req = [
215 user_hash, None, 2, None,
216 [
217 9, None, None, None, None, None, None, None,
218 [None, tfa_code, True, 2]
219 ]]
220
221 tfa_results = req(
222 self._TFA_URL.format(tl), tfa_req,
223 'Submitting TFA code', 'Unable to submit TFA code')
224
225 if tfa_results is False:
226 return False
227
228 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
229 if tfa_res:
230 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
231 warn(
232 'Unable to finish TFA: %s' % 'Invalid TFA code'
233 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
234 return False
235
236 check_cookie_url = try_get(
237 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
238 else:
239 CHALLENGES = {
240 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
241 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
242 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
243 }
244 challenge = CHALLENGES.get(
245 challenge_str,
246 '%s returned error %s.' % (self.IE_NAME, challenge_str))
247 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
248 return False
3995d37d
S
249 else:
250 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
251
252 if not check_cookie_url:
253 warn('Unable to extract CheckCookie URL')
254 return False
e00eb564
S
255
256 check_cookie_results = self._download_webpage(
3995d37d
S
257 check_cookie_url, None, 'Checking cookie', fatal=False)
258
259 if check_cookie_results is False:
260 return False
e00eb564 261
3995d37d
S
262 if 'https://myaccount.google.com/' not in check_cookie_results:
263 warn('Unable to log in')
b2e8bc1b 264 return False
e00eb564 265
b2e8bc1b
JMF
266 return True
267
30226342 268 def _download_webpage_handle(self, *args, **kwargs):
c1148516
S
269 query = kwargs.get('query', {}).copy()
270 query['disable_polymer'] = 'true'
271 kwargs['query'] = query
30226342 272 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
273 *args, **compat_kwargs(kwargs))
274
b2e8bc1b
JMF
275 def _real_initialize(self):
276 if self._downloader is None:
277 return
42939b61 278 self._set_language()
b2e8bc1b
JMF
279 if not self._login():
280 return
c5e8d7af 281
8377574c 282
8e7aad20 283class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 284 # Extract entries from page with "Load more" button
648e6a1f
S
285 def _entries(self, page, playlist_id):
286 more_widget_html = content_html = page
287 for page_num in itertools.count(1):
061a75ed
S
288 for entry in self._process_page(content_html):
289 yield entry
648e6a1f
S
290
291 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
292 if not mobj:
293 break
294
f8c55c66
S
295 count = 0
296 retries = 3
297 while count <= retries:
298 try:
299 # Downloading page may result in intermittent 5xx HTTP error
300 # that is usually worked around with a retry
301 more = self._download_json(
302 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
303 'Downloading page #%s%s'
304 % (page_num, ' (retry #%d)' % count if count else ''),
305 transform_source=uppercase_escape)
306 break
307 except ExtractorError as e:
308 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
309 count += 1
310 if count <= retries:
311 continue
312 raise
313
648e6a1f
S
314 content_html = more['content_html']
315 if not content_html.strip():
316 # Some webpages show a "Load more" button but they don't
317 # have more videos
318 break
319 more_widget_html = more['load_more_widget_html']
320
061a75ed
S
321
322class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
323 def _process_page(self, content):
324 for video_id, video_title in self.extract_videos_from_page(content):
325 yield self.url_result(video_id, 'Youtube', video_id, video_title)
326
351f37c0
S
327 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
328 for mobj in re.finditer(video_re, page):
648e6a1f
S
329 # The link with index 0 is not the first video of the playlist (not sure if still actual)
330 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
331 continue
332 video_id = mobj.group('id')
351f37c0
S
333 video_title = unescapeHTML(
334 mobj.group('title')) if 'title' in mobj.groupdict() else None
648e6a1f
S
335 if video_title:
336 video_title = video_title.strip()
351f37c0
S
337 if video_title == '► Play all':
338 video_title = None
648e6a1f
S
339 try:
340 idx = ids_in_page.index(video_id)
341 if video_title and not titles_in_page[idx]:
342 titles_in_page[idx] = video_title
343 except ValueError:
344 ids_in_page.append(video_id)
345 titles_in_page.append(video_title)
351f37c0
S
346
347 def extract_videos_from_page(self, page):
348 ids_in_page = []
349 titles_in_page = []
350 self.extract_videos_from_page_impl(
351 self._VIDEO_RE, page, ids_in_page, titles_in_page)
648e6a1f
S
352 return zip(ids_in_page, titles_in_page)
353
354
061a75ed
S
355class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
356 def _process_page(self, content):
6dee688e
S
357 for playlist_id in orderedSet(re.findall(
358 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
359 content)):
061a75ed
S
360 yield self.url_result(
361 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
362
0c148415
S
363 def _real_extract(self, url):
364 playlist_id = self._match_id(url)
365 webpage = self._download_webpage(url, playlist_id)
0c148415 366 title = self._og_search_title(webpage, fatal=False)
061a75ed 367 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
368
369
360e1ca5 370class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 371 IE_DESC = 'YouTube.com'
cb7dfeea 372 _VALID_URL = r"""(?x)^
c5e8d7af 373 (
edb53e2d 374 (?:https?://|//) # http(s):// or protocol-independent URL
cb7dfeea 375 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
484aaeb2 376 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 377 (?:www\.)?pwnyoutube\.com/|
8b561bfc 378 (?:www\.)?hooktube\.com/|
f7000f3a 379 (?:www\.)?yourepeat\.com/|
e69ae5b9 380 tube\.majestyc\.net/|
ba036333 381 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
77d95677 382 (?:(?:www|dev)\.)?invidio\.us/|
ba036333 383 (?:(?:www|no)\.)?invidiou\.sh/|
384 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
8ae113ca 385 (?:www\.)?invidious\.kabi\.tk/|
ba036333 386 (?:www\.)?invidious\.enkirton\.net/|
387 (?:www\.)?invidious\.13ad\.de/|
791d2e81 388 (?:www\.)?invidious\.mastodon\.host/|
494d664e 389 (?:www\.)?invidious\.nixnet\.xyz/|
666d808e 390 (?:www\.)?invidious\.drycat\.fr/|
ba036333 391 (?:www\.)?tube\.poal\.co/|
8ae113ca 392 (?:www\.)?vid\.wxzm\.sx/|
494d664e 393 (?:www\.)?yt\.elukerio\.org/|
894b3826 394 (?:www\.)?yt\.lelux\.fi/|
bff90fc5 395 (?:www\.)?kgg2m7yk5aybusll\.onion/|
396 (?:www\.)?qklhadlycap4cnod\.onion/|
397 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
398 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
399 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
400 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
33c1c7d8 401 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
e69ae5b9 402 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
403 (?:.*?\#/)? # handle anchor (#/) redirect urls
404 (?: # the various things that can precede the ID:
ac7553d0 405 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 406 |(?: # or the v= param in all its forms
f7000f3a 407 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 408 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 409 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
410 v=
411 )
f4b05232 412 ))
cbaed4bb
S
413 |(?:
414 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
415 vid\.plus| # or vid.plus/xxxx
416 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 417 )/
edb53e2d 418 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 419 )
c5e8d7af 420 )? # all until now is optional -> you can pass the naked ID
8963d9c2 421 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
422 (?!.*?\blist=
423 (?:
424 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
425 WL # WL are handled by the watch later IE
426 )
427 )
c5e8d7af 428 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 429 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 430 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26 431 _formats = {
c2d3cb4c 432 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
433 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
434 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
435 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
436 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
437 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
438 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
439 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 440 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 441 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
442 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
443 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
444 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
445 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
446 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 447 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 448 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
449 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 450
451
452 # 3D videos
c2d3cb4c 453 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
454 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
455 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
456 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 457 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
458 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
459 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 460
96fb5605 461 # Apple HTTP Live Streaming
11f12195 462 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 463 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
464 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
465 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
466 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
467 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 468 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
469 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
470
471 # DASH mp4 video
d23028a8
S
472 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
473 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
474 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
475 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
476 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 477 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
478 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
479 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
480 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
481 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
482 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
483 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 484
f6f1fc92 485 # Dash mp4 audio
d23028a8
S
486 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
487 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
488 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
489 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
490 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
491 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
492 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
493
494 # Dash webm
d23028a8
S
495 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
496 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
497 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
498 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
499 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
500 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
501 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
502 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
503 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
504 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
505 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
507 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
508 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
509 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 510 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
511 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
512 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
513 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
514 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
515 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
516 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
517
518 # Dash webm audio
d23028a8
S
519 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
520 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 521
0857baad 522 # Dash webm audio with opus inside
d23028a8
S
523 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
524 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
525 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 526
ce6b9a2d
PH
527 # RTMP (unnamed)
528 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
529
530 # av01 video only formats sometimes served with "unknown" codecs
531 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
532 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
533 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
534 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 535 }
19041a38 536 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 537
fd5c4aab
S
538 _GEO_BYPASS = False
539
78caa52a 540 IE_NAME = 'youtube'
2eb88d95
PH
541 _TESTS = [
542 {
2d3d2997 543 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
544 'info_dict': {
545 'id': 'BaW_jenozKc',
546 'ext': 'mp4',
547 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
548 'uploader': 'Philipp Hagemeister',
549 'uploader_id': 'phihag',
ec85ded8 550 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
551 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
552 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e
PH
553 'upload_date': '20121002',
554 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
555 'categories': ['Science & Technology'],
000b6b5a 556 'tags': ['youtube-dl'],
556dbe7f 557 'duration': 10,
dbdaaa23 558 'view_count': int,
3e7c1224
PH
559 'like_count': int,
560 'dislike_count': int,
7c80519c 561 'start_time': 1,
297a564b 562 'end_time': 9,
2eb88d95 563 }
0e853ca4 564 },
0e853ca4 565 {
2d3d2997 566 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
4bc3a23e
PH
567 'note': 'Test generic use_cipher_signature video (#897)',
568 'info_dict': {
569 'id': 'UxxajLWwzqY',
570 'ext': 'mp4',
571 'upload_date': '20120506',
572 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 573 'alt_title': 'I Love It (feat. Charli XCX)',
7caf9830 574 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
000b6b5a
S
575 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
576 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
577 'iconic ep', 'iconic', 'love', 'it'],
556dbe7f 578 'duration': 180,
4bc3a23e
PH
579 'uploader': 'Icona Pop',
580 'uploader_id': 'IconaPop',
ec85ded8 581 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
0cb58b02 582 'creator': 'Icona Pop',
936784b2
S
583 'track': 'I Love It (feat. Charli XCX)',
584 'artist': 'Icona Pop',
2eb88d95 585 }
c108eb73
JMF
586 },
587 {
4bc3a23e
PH
588 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
589 'note': 'Test VEVO video with age protection (#956)',
590 'info_dict': {
591 'id': '07FYdnEawAQ',
592 'ext': 'mp4',
593 'upload_date': '20130703',
4fe54c12 594 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
0cb58b02 595 'alt_title': 'Tunnel Vision',
4fe54c12 596 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
556dbe7f 597 'duration': 419,
4bc3a23e
PH
598 'uploader': 'justintimberlakeVEVO',
599 'uploader_id': 'justintimberlakeVEVO',
ec85ded8 600 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
0cb58b02 601 'creator': 'Justin Timberlake',
7e72694b 602 'track': 'Tunnel Vision',
936784b2 603 'artist': 'Justin Timberlake',
34952f09 604 'age_limit': 18,
c108eb73
JMF
605 }
606 },
fccd3771 607 {
4bc3a23e
PH
608 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
609 'note': 'Embed-only video (#1746)',
610 'info_dict': {
611 'id': 'yZIXLfi8CZQ',
612 'ext': 'mp4',
613 'upload_date': '20120608',
614 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
615 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
616 'uploader': 'SET India',
94bfcd23 617 'uploader_id': 'setindia',
ec85ded8 618 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 619 'age_limit': 18,
fccd3771
PH
620 }
621 },
11b56058 622 {
2d3d2997 623 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
624 'note': 'Use the first video ID in the URL',
625 'info_dict': {
626 'id': 'BaW_jenozKc',
627 'ext': 'mp4',
628 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
629 'uploader': 'Philipp Hagemeister',
630 'uploader_id': 'phihag',
ec85ded8 631 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058
PM
632 'upload_date': '20121002',
633 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
634 'categories': ['Science & Technology'],
635 'tags': ['youtube-dl'],
556dbe7f 636 'duration': 10,
dbdaaa23 637 'view_count': int,
11b56058
PM
638 'like_count': int,
639 'dislike_count': int,
34a7de29
S
640 },
641 'params': {
642 'skip_download': True,
643 },
11b56058 644 },
dd27fd17 645 {
2d3d2997 646 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
647 'note': '256k DASH audio (format 141) via DASH manifest',
648 'info_dict': {
649 'id': 'a9LDPn-MO4I',
650 'ext': 'm4a',
651 'upload_date': '20121002',
652 'uploader_id': '8KVIDEO',
ec85ded8 653 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
654 'description': '',
655 'uploader': '8KVIDEO',
656 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 657 },
4bc3a23e
PH
658 'params': {
659 'youtube_include_dash_manifest': True,
660 'format': '141',
4919603f 661 },
de3c7fe0 662 'skip': 'format 141 not served anymore',
dd27fd17 663 },
3489b7d2
JMF
664 # DASH manifest with encrypted signature
665 {
78caa52a
PH
666 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
667 'info_dict': {
668 'id': 'IB3lcPjvWLA',
669 'ext': 'm4a',
4fe54c12
S
670 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
671 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
556dbe7f 672 'duration': 244,
78caa52a
PH
673 'uploader': 'AfrojackVEVO',
674 'uploader_id': 'AfrojackVEVO',
675 'upload_date': '20131011',
3489b7d2 676 },
4bc3a23e 677 'params': {
78caa52a 678 'youtube_include_dash_manifest': True,
de3c7fe0 679 'format': '141/bestaudio[ext=m4a]',
3489b7d2
JMF
680 },
681 },
aaeb86f6
S
682 # JS player signature function name containing $
683 {
684 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
685 'info_dict': {
686 'id': 'nfWlot6h_JM',
687 'ext': 'm4a',
688 'title': 'Taylor Swift - Shake It Off',
4fe54c12 689 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
556dbe7f 690 'duration': 242,
aaeb86f6
S
691 'uploader': 'TaylorSwiftVEVO',
692 'uploader_id': 'TaylorSwiftVEVO',
693 'upload_date': '20140818',
0cb58b02 694 'creator': 'Taylor Swift',
aaeb86f6
S
695 },
696 'params': {
697 'youtube_include_dash_manifest': True,
de3c7fe0 698 'format': '141/bestaudio[ext=m4a]',
aaeb86f6
S
699 },
700 },
aa79ac0c
PH
701 # Controversy video
702 {
703 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
704 'info_dict': {
705 'id': 'T4XJQO3qol8',
706 'ext': 'mp4',
556dbe7f 707 'duration': 219,
aa79ac0c 708 'upload_date': '20100909',
4fe54c12 709 'uploader': 'Amazing Atheist',
aa79ac0c 710 'uploader_id': 'TheAmazingAtheist',
ec85ded8 711 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c
PH
712 'title': 'Burning Everyone\'s Koran',
713 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
714 }
c522adb1
JMF
715 },
716 # Normal age-gate video (No vevo, embed allowed)
717 {
2d3d2997 718 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
719 'info_dict': {
720 'id': 'HtVdAasjOgU',
721 'ext': 'mp4',
722 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 723 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 724 'duration': 142,
c522adb1
JMF
725 'uploader': 'The Witcher',
726 'uploader_id': 'WitcherGame',
ec85ded8 727 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 728 'upload_date': '20140605',
34952f09 729 'age_limit': 18,
c522adb1
JMF
730 },
731 },
fccae2b9
S
732 # Age-gate video with encrypted signature
733 {
2d3d2997 734 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
fccae2b9
S
735 'info_dict': {
736 'id': '6kLq3WMV1nU',
4fe54c12 737 'ext': 'mp4',
fccae2b9
S
738 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
739 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
eb6793ba 740 'duration': 246,
fccae2b9
S
741 'uploader': 'LloydVEVO',
742 'uploader_id': 'LloydVEVO',
ec85ded8 743 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 744 'upload_date': '20110629',
34952f09 745 'age_limit': 18,
fccae2b9
S
746 },
747 },
067aa17e 748 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
7d02dcfa 749 # YouTube Red ad is not captured for creator
774e208f
PH
750 {
751 'url': '__2ABJjxzNo',
752 'info_dict': {
753 'id': '__2ABJjxzNo',
754 'ext': 'mp4',
556dbe7f 755 'duration': 266,
774e208f
PH
756 'upload_date': '20100430',
757 'uploader_id': 'deadmau5',
ec85ded8 758 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
0cb58b02 759 'creator': 'deadmau5',
774e208f
PH
760 'description': 'md5:12c56784b8032162bb936a5f76d55360',
761 'uploader': 'deadmau5',
762 'title': 'Deadmau5 - Some Chords (HD)',
0cb58b02 763 'alt_title': 'Some Chords',
774e208f
PH
764 },
765 'expected_warnings': [
766 'DASH manifest missing',
767 ]
e52a40ab 768 },
067aa17e 769 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
770 {
771 'url': 'lqQg6PlCWgI',
772 'info_dict': {
773 'id': 'lqQg6PlCWgI',
774 'ext': 'mp4',
556dbe7f 775 'duration': 6085,
90227264 776 'upload_date': '20150827',
cbe2bd91 777 'uploader_id': 'olympic',
ec85ded8 778 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 779 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 780 'uploader': 'Olympic',
cbe2bd91
PH
781 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
782 },
783 'params': {
784 'skip_download': 'requires avconv',
e52a40ab 785 }
cbe2bd91 786 },
6271f1ca
PH
787 # Non-square pixels
788 {
789 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
790 'info_dict': {
791 'id': '_b-2C3KPAM0',
792 'ext': 'mp4',
793 'stretched_ratio': 16 / 9.,
556dbe7f 794 'duration': 85,
6271f1ca
PH
795 'upload_date': '20110310',
796 'uploader_id': 'AllenMeow',
ec85ded8 797 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 798 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 799 'uploader': '孫ᄋᄅ',
6271f1ca
PH
800 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
801 },
06b491eb
S
802 },
803 # url_encoded_fmt_stream_map is empty string
804 {
805 'url': 'qEJwOuvDf7I',
806 'info_dict': {
807 'id': 'qEJwOuvDf7I',
f57b7835 808 'ext': 'webm',
06b491eb
S
809 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
810 'description': '',
811 'upload_date': '20150404',
812 'uploader_id': 'spbelect',
813 'uploader': 'Наблюдатели Петербурга',
814 },
815 'params': {
816 'skip_download': 'requires avconv',
e323cf3f
S
817 },
818 'skip': 'This live event has ended.',
06b491eb 819 },
067aa17e 820 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
821 {
822 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
823 'info_dict': {
824 'id': 'FIl7x6_3R5Y',
eb6793ba 825 'ext': 'webm',
da77d856
S
826 'title': 'md5:7b81415841e02ecd4313668cde88737a',
827 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 828 'duration': 220,
da77d856
S
829 'upload_date': '20150625',
830 'uploader_id': 'dorappi2000',
ec85ded8 831 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 832 'uploader': 'dorappi2000',
eb6793ba 833 'formats': 'mincount:31',
da77d856 834 },
eb6793ba 835 'skip': 'not actual anymore',
2ee8f5d8 836 },
8a1a26ce
YCH
837 # DASH manifest with segment_list
838 {
839 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
840 'md5': '8ce563a1d667b599d21064e982ab9e31',
841 'info_dict': {
842 'id': 'CsmdDsKjzN8',
843 'ext': 'mp4',
17ee98e1 844 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
845 'uploader': 'Airtek',
846 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
847 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
848 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
849 },
850 'params': {
851 'youtube_include_dash_manifest': True,
852 'format': '135', # bestvideo
be49068d
S
853 },
854 'skip': 'This live event has ended.',
2ee8f5d8 855 },
cf7e015f
S
856 {
857 # Multifeed videos (multiple cameras), URL is for Main Camera
858 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
859 'info_dict': {
860 'id': 'jqWvoWXjCVs',
861 'title': 'teamPGP: Rocket League Noob Stream',
862 'description': 'md5:dc7872fb300e143831327f1bae3af010',
863 },
864 'playlist': [{
865 'info_dict': {
866 'id': 'jqWvoWXjCVs',
867 'ext': 'mp4',
868 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
869 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 870 'duration': 7335,
cf7e015f
S
871 'upload_date': '20150721',
872 'uploader': 'Beer Games Beer',
873 'uploader_id': 'beergamesbeer',
ec85ded8 874 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 875 'license': 'Standard YouTube License',
cf7e015f
S
876 },
877 }, {
878 'info_dict': {
879 'id': '6h8e8xoXJzg',
880 'ext': 'mp4',
881 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
882 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 883 'duration': 7337,
cf7e015f
S
884 'upload_date': '20150721',
885 'uploader': 'Beer Games Beer',
886 'uploader_id': 'beergamesbeer',
ec85ded8 887 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 888 'license': 'Standard YouTube License',
cf7e015f
S
889 },
890 }, {
891 'info_dict': {
892 'id': 'PUOgX5z9xZw',
893 'ext': 'mp4',
894 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
895 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 896 'duration': 7337,
cf7e015f
S
897 'upload_date': '20150721',
898 'uploader': 'Beer Games Beer',
899 'uploader_id': 'beergamesbeer',
ec85ded8 900 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 901 'license': 'Standard YouTube License',
cf7e015f
S
902 },
903 }, {
904 'info_dict': {
905 'id': 'teuwxikvS5k',
906 'ext': 'mp4',
907 'title': 'teamPGP: Rocket League Noob Stream (zim)',
908 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 909 'duration': 7334,
cf7e015f
S
910 'upload_date': '20150721',
911 'uploader': 'Beer Games Beer',
912 'uploader_id': 'beergamesbeer',
ec85ded8 913 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 914 'license': 'Standard YouTube License',
cf7e015f
S
915 },
916 }],
917 'params': {
918 'skip_download': True,
919 },
4fe54c12 920 'skip': 'This video is not available.',
cbaed4bb 921 },
f9f49d87 922 {
067aa17e 923 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
924 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
925 'info_dict': {
926 'id': 'gVfLd0zydlo',
927 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
928 },
929 'playlist_count': 2,
be49068d 930 'skip': 'Not multifeed anymore',
f9f49d87 931 },
cbaed4bb 932 {
2d3d2997 933 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 934 'only_matching': True,
0e49d9a6 935 },
6d4fc66b 936 {
2d3d2997 937 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
938 'only_matching': True,
939 },
0e49d9a6 940 {
067aa17e 941 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 942 # Also tests cut-off URL expansion in video description (see
067aa17e
S
943 # https://github.com/ytdl-org/youtube-dl/issues/1892,
944 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
945 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
946 'info_dict': {
947 'id': 'lsguqyKfVQg',
948 'ext': 'mp4',
949 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 950 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 951 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 952 'duration': 133,
0e49d9a6
LL
953 'upload_date': '20151119',
954 'uploader_id': 'IronSoulElf',
ec85ded8 955 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 956 'uploader': 'IronSoulElf',
eb6793ba
S
957 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
958 'track': 'Dark Walk - Position Music',
959 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 960 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
961 },
962 'params': {
963 'skip_download': True,
964 },
965 },
61f92af1 966 {
067aa17e 967 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
968 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
969 'only_matching': True,
970 },
313dfc45
LL
971 {
972 # Video with yt:stretch=17:0
973 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
974 'info_dict': {
975 'id': 'Q39EVAstoRM',
976 'ext': 'mp4',
977 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
978 'description': 'md5:ee18a25c350637c8faff806845bddee9',
979 'upload_date': '20151107',
980 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
981 'uploader': 'CH GAMER DROID',
982 },
983 'params': {
984 'skip_download': True,
985 },
be49068d 986 'skip': 'This video does not exist.',
313dfc45 987 },
7caf9830
S
988 {
989 # Video licensed under Creative Commons
990 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
991 'info_dict': {
992 'id': 'M4gD1WSo5mA',
993 'ext': 'mp4',
994 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
995 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 996 'duration': 721,
7caf9830
S
997 'upload_date': '20150127',
998 'uploader_id': 'BerkmanCenter',
ec85ded8 999 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1000 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1001 'license': 'Creative Commons Attribution license (reuse allowed)',
1002 },
1003 'params': {
1004 'skip_download': True,
1005 },
1006 },
fd050249
S
1007 {
1008 # Channel-like uploader_url
1009 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1010 'info_dict': {
1011 'id': 'eQcmzGIKrzg',
1012 'ext': 'mp4',
1013 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1014 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 1015 'duration': 4060,
fd050249 1016 'upload_date': '20151119',
eb6793ba 1017 'uploader': 'Bernie Sanders',
fd050249 1018 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1019 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1020 'license': 'Creative Commons Attribution license (reuse allowed)',
1021 },
1022 'params': {
1023 'skip_download': True,
1024 },
1025 },
040ac686
S
1026 {
1027 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1028 'only_matching': True,
7f29cf54
S
1029 },
1030 {
067aa17e 1031 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1032 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1033 'only_matching': True,
6496ccb4
S
1034 },
1035 {
1036 # Rental video preview
1037 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1038 'info_dict': {
1039 'id': 'uGpuVWrhIzE',
1040 'ext': 'mp4',
1041 'title': 'Piku - Trailer',
1042 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1043 'upload_date': '20150811',
1044 'uploader': 'FlixMatrix',
1045 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1046 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1047 'license': 'Standard YouTube License',
1048 },
1049 'params': {
1050 'skip_download': True,
1051 },
eb6793ba 1052 'skip': 'This video is not available.',
022a5d66 1053 },
12afdc2a
S
1054 {
1055 # YouTube Red video with episode data
1056 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1057 'info_dict': {
1058 'id': 'iqKdEhx-dD4',
1059 'ext': 'mp4',
1060 'title': 'Isolation - Mind Field (Ep 1)',
4fe54c12 1061 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
556dbe7f 1062 'duration': 2085,
12afdc2a
S
1063 'upload_date': '20170118',
1064 'uploader': 'Vsauce',
1065 'uploader_id': 'Vsauce',
1066 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1067 'series': 'Mind Field',
1068 'season_number': 1,
1069 'episode_number': 1,
1070 },
1071 'params': {
1072 'skip_download': True,
1073 },
1074 'expected_warnings': [
1075 'Skipping DASH manifest',
1076 ],
1077 },
c7121fa7
S
1078 {
1079 # The following content has been identified by the YouTube community
1080 # as inappropriate or offensive to some audiences.
1081 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1082 'info_dict': {
1083 'id': '6SJNVb0GnPI',
1084 'ext': 'mp4',
1085 'title': 'Race Differences in Intelligence',
1086 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1087 'duration': 965,
1088 'upload_date': '20140124',
1089 'uploader': 'New Century Foundation',
1090 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1091 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1092 },
1093 'params': {
1094 'skip_download': True,
1095 },
1096 },
022a5d66
S
1097 {
1098 # itag 212
1099 'url': '1t24XAntNCY',
1100 'only_matching': True,
fd5c4aab
S
1101 },
1102 {
1103 # geo restricted to JP
1104 'url': 'sJL6WA-aGkQ',
1105 'only_matching': True,
1106 },
d0ba5587
S
1107 {
1108 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1109 'only_matching': True,
1110 },
cd5a74a2
S
1111 {
1112 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1113 'only_matching': True,
1114 },
825cd268
RA
1115 {
1116 # DRM protected
1117 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1118 'only_matching': True,
4fe54c12
S
1119 },
1120 {
1121 # Video with unsupported adaptive stream type formats
1122 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1123 'info_dict': {
1124 'id': 'Z4Vy8R84T1U',
1125 'ext': 'mp4',
1126 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1127 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1128 'duration': 433,
1129 'upload_date': '20130923',
1130 'uploader': 'Amelia Putri Harwita',
1131 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1132 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1133 'formats': 'maxcount:10',
1134 },
1135 'params': {
1136 'skip_download': True,
1137 'youtube_include_dash_manifest': False,
1138 },
5caabd3c 1139 },
1140 {
822b9d9c 1141 # Youtube Music Auto-generated description
5caabd3c 1142 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1143 'info_dict': {
1144 'id': 'MgNrAu2pzNs',
1145 'ext': 'mp4',
1146 'title': 'Voyeur Girl',
1147 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1148 'upload_date': '20190312',
1149 'uploader': 'Various Artists - Topic',
1150 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
1151 'artist': 'Stephen',
1152 'track': 'Voyeur Girl',
1153 'album': 'it\'s too much love to know my dear',
1154 'release_date': '20190313',
1155 'release_year': 2019,
1156 },
1157 'params': {
1158 'skip_download': True,
1159 },
1160 },
1161 {
822b9d9c 1162 # Youtube Music Auto-generated description
5caabd3c 1163 # Retrieve 'artist' field from 'Artist:' in video description
1164 # when it is present on youtube music video
5caabd3c 1165 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1166 'info_dict': {
1167 'id': 'k0jLE7tTwjY',
1168 'ext': 'mp4',
1169 'title': 'Latch Feat. Sam Smith',
1170 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1171 'upload_date': '20150110',
1172 'uploader': 'Various Artists - Topic',
1173 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1174 'artist': 'Disclosure',
1175 'track': 'Latch Feat. Sam Smith',
1176 'album': 'Latch Featuring Sam Smith',
1177 'release_date': '20121008',
1178 'release_year': 2012,
1179 },
1180 'params': {
1181 'skip_download': True,
1182 },
1183 },
1184 {
822b9d9c 1185 # Youtube Music Auto-generated description
5caabd3c 1186 # handle multiple artists on youtube music video
1187 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1188 'info_dict': {
1189 'id': '74qn0eJSjpA',
1190 'ext': 'mp4',
1191 'title': 'Eastside',
1192 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1193 'upload_date': '20180710',
1194 'uploader': 'Benny Blanco - Topic',
1195 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1196 'artist': 'benny blanco, Halsey, Khalid',
1197 'track': 'Eastside',
1198 'album': 'Eastside',
1199 'release_date': '20180713',
1200 'release_year': 2018,
1201 },
1202 'params': {
1203 'skip_download': True,
1204 },
1205 },
1206 {
822b9d9c 1207 # Youtube Music Auto-generated description
5caabd3c 1208 # handle youtube music video with release_year and no release_date
1209 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1210 'info_dict': {
1211 'id': '-hcAI0g-f5M',
1212 'ext': 'mp4',
1213 'title': 'Put It On Me',
1214 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
1215 'upload_date': '20180426',
1216 'uploader': 'Matt Maeson - Topic',
1217 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1218 'artist': 'Matt Maeson',
1219 'track': 'Put It On Me',
1220 'album': 'The Hearse',
1221 'release_date': None,
1222 'release_year': 2018,
1223 },
1224 'params': {
1225 'skip_download': True,
1226 },
1227 },
2eb88d95
PH
1228 ]
1229
e0df6211
PH
1230 def __init__(self, *args, **kwargs):
1231 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1232 self._player_cache = {}
e0df6211 1233
c5e8d7af
PH
1234 def report_video_info_webpage_download(self, video_id):
1235 """Report attempt to download video info webpage."""
69ea8ca4 1236 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1237
c5e8d7af
PH
1238 def report_information_extraction(self, video_id):
1239 """Report attempt to extract video information."""
69ea8ca4 1240 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1241
1242 def report_unavailable_format(self, video_id, format):
1243 """Report extracted video URL."""
69ea8ca4 1244 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1245
1246 def report_rtmp_download(self):
1247 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1248 self.to_screen('RTMP download detected')
c5e8d7af 1249
60064c53
PH
1250 def _signature_cache_id(self, example_sig):
1251 """ Return a string representation of a signature """
78caa52a 1252 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53
PH
1253
1254 def _extract_signature_function(self, video_id, player_url, example_sig):
cf010131 1255 id_m = re.match(
63529e93 1256 r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
cf010131 1257 player_url)
c081b35c
PH
1258 if not id_m:
1259 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
1260 player_type = id_m.group('ext')
1261 player_id = id_m.group('id')
1262
c4417ddb 1263 # Read from filesystem cache
60064c53
PH
1264 func_id = '%s_%s_%s' % (
1265 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1266 assert os.path.basename(func_id) == func_id
a0e07d31 1267
69ea8ca4 1268 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1269 if cache_spec is not None:
78caa52a 1270 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1271
6d1a55a5
PH
1272 download_note = (
1273 'Downloading player %s' % player_url
1274 if self._downloader.params.get('verbose') else
1275 'Downloading %s player %s' % (player_type, player_id)
1276 )
e0df6211
PH
1277 if player_type == 'js':
1278 code = self._download_webpage(
1279 player_url, video_id,
6d1a55a5 1280 note=download_note,
69ea8ca4 1281 errnote='Download of %s failed' % player_url)
83799698 1282 res = self._parse_sig_js(code)
c4417ddb 1283 elif player_type == 'swf':
e0df6211
PH
1284 urlh = self._request_webpage(
1285 player_url, video_id,
6d1a55a5 1286 note=download_note,
69ea8ca4 1287 errnote='Download of %s failed' % player_url)
e0df6211 1288 code = urlh.read()
83799698 1289 res = self._parse_sig_swf(code)
e0df6211
PH
1290 else:
1291 assert False, 'Invalid player type %r' % player_type
1292
785521bf
PH
1293 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1294 cache_res = res(test_string)
1295 cache_spec = [ord(c) for c in cache_res]
83799698 1296
69ea8ca4 1297 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1298 return res
1299
60064c53 1300 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1301 def gen_sig_code(idxs):
1302 def _genslice(start, end, step):
78caa52a 1303 starts = '' if start == 0 else str(start)
8bcc8756 1304 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1305 steps = '' if step == 1 else (':%d' % step)
78caa52a 1306 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1307
1308 step = None
7af808a5
PH
1309 # Quelch pyflakes warnings - start will be set when step is set
1310 start = '(Never used)'
edf3e38e
PH
1311 for i, prev in zip(idxs[1:], idxs[:-1]):
1312 if step is not None:
1313 if i - prev == step:
1314 continue
1315 yield _genslice(start, prev, step)
1316 step = None
1317 continue
1318 if i - prev in [-1, 1]:
1319 step = i - prev
1320 start = prev
1321 continue
1322 else:
78caa52a 1323 yield 's[%d]' % prev
edf3e38e 1324 if step is None:
78caa52a 1325 yield 's[%d]' % i
edf3e38e
PH
1326 else:
1327 yield _genslice(start, i, step)
1328
78caa52a 1329 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1330 cache_res = func(test_string)
edf3e38e 1331 cache_spec = [ord(c) for c in cache_res]
78caa52a 1332 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1333 signature_id_tuple = '(%s)' % (
1334 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1335 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1336 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1337 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1338
e0df6211
PH
1339 def _parse_sig_js(self, jscode):
1340 funcname = self._search_regex(
abefc03f
S
1341 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1342 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
31ce6e99 1343 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1344 # Obsolete patterns
1345 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1346 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1347 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1348 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1349 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1350 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1351 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1352 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1353 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1354
1355 jsi = JSInterpreter(jscode)
1356 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1357 return lambda s: initial_function([s])
1358
1359 def _parse_sig_swf(self, file_contents):
54256267 1360 swfi = SWFInterpreter(file_contents)
78caa52a 1361 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1362 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1363 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1364 return lambda s: initial_function([s])
1365
83799698 1366 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1367 """Turn the encrypted s field into a working signature"""
6b37f0be 1368
c8bf86d5 1369 if player_url is None:
69ea8ca4 1370 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1371
69ea8ca4 1372 if player_url.startswith('//'):
78caa52a 1373 player_url = 'https:' + player_url
3c90cc8b
S
1374 elif not re.match(r'https?://', player_url):
1375 player_url = compat_urlparse.urljoin(
1376 'https://www.youtube.com', player_url)
c8bf86d5 1377 try:
62af3a0e 1378 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1379 if player_id not in self._player_cache:
1380 func = self._extract_signature_function(
60064c53 1381 video_id, player_url, s
c8bf86d5
PH
1382 )
1383 self._player_cache[player_id] = func
1384 func = self._player_cache[player_id]
1385 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1386 self._print_sig_code(func, s)
c8bf86d5
PH
1387 return func(s)
1388 except Exception as e:
1389 tb = traceback.format_exc()
1390 raise ExtractorError(
78caa52a 1391 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1392
360e1ca5 1393 def _get_subtitles(self, video_id, webpage):
de7f3446 1394 try:
60e47a26 1395 subs_doc = self._download_xml(
38c2e5b8 1396 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1397 video_id, note=False)
1398 except ExtractorError as err:
9b9c5355 1399 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1400 return {}
de7f3446
JMF
1401
1402 sub_lang_list = {}
60e47a26
JMF
1403 for track in subs_doc.findall('track'):
1404 lang = track.attrib['lang_code']
7e660ac1
LD
1405 if lang in sub_lang_list:
1406 continue
360e1ca5 1407 sub_formats = []
23d17e4b 1408 for ext in self._SUBTITLE_FORMATS:
15707c7e 1409 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1410 'lang': lang,
1411 'v': video_id,
1412 'fmt': ext,
1413 'name': track.attrib['name'].encode('utf-8'),
1414 })
1415 sub_formats.append({
1416 'url': 'https://www.youtube.com/api/timedtext?' + params,
1417 'ext': ext,
1418 })
1419 sub_lang_list[lang] = sub_formats
de7f3446 1420 if not sub_lang_list:
69ea8ca4 1421 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1422 return {}
1423 return sub_lang_list
1424
a72778d3
S
1425 def _get_ytplayer_config(self, video_id, webpage):
1426 patterns = (
526b3b07
S
1427 # User data may contain arbitrary character sequences that may affect
1428 # JSON extraction with regex, e.g. when '};' is contained the second
1429 # regex won't capture the whole JSON. Yet working around by trying more
1430 # concrete regex first keeping in mind proper quoted string handling
1431 # to be implemented in future that will replace this workaround (see
067aa17e
S
1432 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1433 # https://github.com/ytdl-org/youtube-dl/pull/7599)
a72778d3
S
1434 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1435 r';ytplayer\.config\s*=\s*({.+?});',
1436 )
1437 config = self._search_regex(
1438 patterns, webpage, 'ytplayer.config', default=None)
1439 if config:
1440 return self._parse_json(
1441 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1442
360e1ca5 1443 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1444 """We need the webpage for getting the captions url, pass it as an
1445 argument to speed up the process."""
69ea8ca4 1446 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1447 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1448 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1449 if not player_config:
de7f3446
JMF
1450 self._downloader.report_warning(err_msg)
1451 return {}
de7f3446 1452 try:
0792d563 1453 args = player_config['args']
b78b292f
S
1454 caption_url = args.get('ttsurl')
1455 if caption_url:
1456 timestamp = args['timestamp']
1457 # We get the available subtitles
15707c7e 1458 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1459 'type': 'list',
1460 'tlangs': 1,
1461 'asrs': 1,
1462 })
1463 list_url = caption_url + '&' + list_params
1464 caption_list = self._download_xml(list_url, video_id)
1465 original_lang_node = caption_list.find('track')
1466 if original_lang_node is None:
1467 self._downloader.report_warning('Video doesn\'t have automatic captions')
1468 return {}
1469 original_lang = original_lang_node.attrib['lang_code']
1470 caption_kind = original_lang_node.attrib.get('kind', '')
1471
1472 sub_lang_list = {}
1473 for lang_node in caption_list.findall('target'):
1474 sub_lang = lang_node.attrib['lang_code']
1475 sub_formats = []
1476 for ext in self._SUBTITLE_FORMATS:
15707c7e 1477 params = compat_urllib_parse_urlencode({
b78b292f
S
1478 'lang': original_lang,
1479 'tlang': sub_lang,
1480 'fmt': ext,
1481 'ts': timestamp,
1482 'kind': caption_kind,
1483 })
1484 sub_formats.append({
1485 'url': caption_url + '&' + params,
1486 'ext': ext,
1487 })
1488 sub_lang_list[sub_lang] = sub_formats
1489 return sub_lang_list
1490
ddbb4c5c
S
1491 def make_captions(sub_url, sub_langs):
1492 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1493 caption_qs = compat_parse_qs(parsed_sub_url.query)
1494 captions = {}
1495 for sub_lang in sub_langs:
1496 sub_formats = []
1497 for ext in self._SUBTITLE_FORMATS:
1498 caption_qs.update({
1499 'tlang': [sub_lang],
1500 'fmt': [ext],
1501 })
1502 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1503 query=compat_urllib_parse_urlencode(caption_qs, True)))
1504 sub_formats.append({
1505 'url': sub_url,
1506 'ext': ext,
1507 })
1508 captions[sub_lang] = sub_formats
1509 return captions
1510
1511 # New captions format as of 22.06.2017
1512 player_response = args.get('player_response')
1513 if player_response and isinstance(player_response, compat_str):
1514 player_response = self._parse_json(
1515 player_response, video_id, fatal=False)
1516 if player_response:
1517 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1518 base_url = renderer['captionTracks'][0]['baseUrl']
1519 sub_lang_list = []
1520 for lang in renderer['translationLanguages']:
1521 lang_code = lang.get('languageCode')
1522 if lang_code:
1523 sub_lang_list.append(lang_code)
1524 return make_captions(base_url, sub_lang_list)
1525
b78b292f
S
1526 # Some videos don't provide ttsurl but rather caption_tracks and
1527 # caption_translation_languages (e.g. 20LmZk1hakA)
ddbb4c5c 1528 # Does not used anymore as of 22.06.2017
b78b292f
S
1529 caption_tracks = args['caption_tracks']
1530 caption_translation_languages = args['caption_translation_languages']
1531 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
ddbb4c5c 1532 sub_lang_list = []
b78b292f
S
1533 for lang in caption_translation_languages.split(','):
1534 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1535 sub_lang = lang_qs.get('lc', [None])[0]
ddbb4c5c
S
1536 if sub_lang:
1537 sub_lang_list.append(sub_lang)
1538 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1539 # An extractor error can be raise by the download process if there are
1540 # no automatic captions but there are subtitles
ddbb4c5c 1541 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1542 self._downloader.report_warning(err_msg)
1543 return {}
1544
21c340b8
S
1545 def _mark_watched(self, video_id, video_info, player_response):
1546 playback_url = url_or_none(try_get(
1547 player_response,
1548 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1549 video_info, lambda x: x['videostats_playback_base_url'][0]))
d77ab8e2
S
1550 if not playback_url:
1551 return
1552 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1553 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1554
1555 # cpn generation algorithm is reverse engineered from base.js.
1556 # In fact it works even with dummy cpn.
1557 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1558 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1559
1560 qs.update({
1561 'ver': ['2'],
1562 'cpn': [cpn],
1563 })
1564 playback_url = compat_urlparse.urlunparse(
15707c7e 1565 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1566
1567 self._download_webpage(
1568 playback_url, video_id, 'Marking watched',
1569 'Unable to mark watched', fatal=False)
1570
66c9fa36
S
1571 @staticmethod
1572 def _extract_urls(webpage):
1573 # Embedded YouTube player
1574 entries = [
1575 unescapeHTML(mobj.group('url'))
1576 for mobj in re.finditer(r'''(?x)
1577 (?:
1578 <iframe[^>]+?src=|
1579 data-video-url=|
1580 <embed[^>]+?src=|
1581 embedSWF\(?:\s*|
1582 <object[^>]+data=|
1583 new\s+SWFObject\(
1584 )
1585 (["\'])
1586 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1587 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1588 \1''', webpage)]
1589
1590 # lazyYT YouTube embed
1591 entries.extend(list(map(
1592 unescapeHTML,
1593 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1594
1595 # Wordpress "YouTube Video Importer" plugin
1596 matches = re.findall(r'''(?x)<div[^>]+
1597 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1598 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1599 entries.extend(m[-1] for m in matches)
1600
1601 return entries
1602
1603 @staticmethod
1604 def _extract_url(webpage):
1605 urls = YoutubeIE._extract_urls(webpage)
1606 return urls[0] if urls else None
1607
97665381
PH
1608 @classmethod
1609 def extract_id(cls, url):
1610 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1611 if mobj is None:
69ea8ca4 1612 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1613 video_id = mobj.group(2)
1614 return video_id
1615
9cafc3fd
S
1616 @staticmethod
1617 def _extract_chapters(description, duration):
1618 if not description:
1619 return None
1620 chapter_lines = re.findall(
1621 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1622 description)
1623 if not chapter_lines:
1624 return None
1625 chapters = []
1626 for next_num, (chapter_line, time_point) in enumerate(
1627 chapter_lines, start=1):
1628 start_time = parse_duration(time_point)
1629 if start_time is None:
1630 continue
39d4c1be
S
1631 if start_time > duration:
1632 break
9cafc3fd
S
1633 end_time = (duration if next_num == len(chapter_lines)
1634 else parse_duration(chapter_lines[next_num][1]))
1635 if end_time is None:
1636 continue
39d4c1be
S
1637 if end_time > duration:
1638 end_time = duration
1639 if start_time > end_time:
1640 break
9cafc3fd
S
1641 chapter_title = re.sub(
1642 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1643 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1644 chapters.append({
1645 'start_time': start_time,
1646 'end_time': end_time,
1647 'title': chapter_title,
1648 })
1649 return chapters
1650
c5e8d7af 1651 def _real_extract(self, url):
cf7e015f
S
1652 url, smuggled_data = unsmuggle_url(url, {})
1653
7e8c0af0 1654 proto = (
78caa52a
PH
1655 'http' if self._downloader.params.get('prefer_insecure', False)
1656 else 'https')
7e8c0af0 1657
7c80519c 1658 start_time = None
297a564b 1659 end_time = None
7c80519c
JMF
1660 parsed_url = compat_urllib_parse_urlparse(url)
1661 for component in [parsed_url.fragment, parsed_url.query]:
1662 query = compat_parse_qs(component)
297a564b 1663 if start_time is None and 't' in query:
7c80519c 1664 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1665 if start_time is None and 'start' in query:
1666 start_time = parse_duration(query['start'][0])
297a564b
JMF
1667 if end_time is None and 'end' in query:
1668 end_time = parse_duration(query['end'][0])
7c80519c 1669
c5e8d7af
PH
1670 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1671 mobj = re.search(self._NEXT_URL_RE, url)
1672 if mobj:
7fd002c0 1673 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1674 video_id = self.extract_id(url)
c5e8d7af
PH
1675
1676 # Get video webpage
aa79ac0c 1677 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
a1f934b1 1678 video_webpage = self._download_webpage(url, video_id)
c5e8d7af
PH
1679
1680 # Attempt to extract SWF player URL
e0df6211 1681 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1682 if mobj is not None:
1683 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1684 else:
1685 player_url = None
1686
d8d24a92
S
1687 dash_mpds = []
1688
1689 def add_dash_mpd(video_info):
1690 dash_mpd = video_info.get('dashmpd')
1691 if dash_mpd and dash_mpd[0] not in dash_mpds:
1692 dash_mpds.append(dash_mpd[0])
1693
561b456e
S
1694 def add_dash_mpd_pr(pl_response):
1695 dash_mpd = url_or_none(try_get(
1696 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1697 compat_str))
1698 if dash_mpd and dash_mpd not in dash_mpds:
1699 dash_mpds.append(dash_mpd)
1700
c7121fa7
S
1701 is_live = None
1702 view_count = None
1703
1704 def extract_view_count(v_info):
1705 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1706
026fbedc
S
1707 def extract_token(v_info):
1708 return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
1709
c2d125d9
S
1710 def extract_player_response(player_response, video_id):
1711 pl_response = str_or_none(player_response)
1712 if not pl_response:
1713 return
1714 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1715 if isinstance(pl_response, dict):
1716 add_dash_mpd_pr(pl_response)
1717 return pl_response
1718
dbdaaa23
S
1719 player_response = {}
1720
c5e8d7af 1721 # Get video info
6449cd80 1722 embed_webpage = None
c108eb73 1723 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
1724 age_gate = True
1725 # We simulate the access to the video from www.youtube.com/v/{video_id}
1726 # this can be viewed without login into Youtube
beb95e77
CL
1727 url = proto + '://www.youtube.com/embed/%s' % video_id
1728 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
15707c7e 1729 data = compat_urllib_parse_urlencode({
2c57c7fa
JMF
1730 'video_id': video_id,
1731 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 1732 'sts': self._search_regex(
beb95e77 1733 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 1734 })
7e8c0af0 1735 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
94bd3613
PH
1736 video_info_webpage = self._download_webpage(
1737 video_info_url, video_id,
20436c30 1738 note='Refetching age-gated info webpage',
94bd3613 1739 errnote='unable to download video info webpage')
c5e8d7af 1740 video_info = compat_parse_qs(video_info_webpage)
c2d125d9
S
1741 pl_response = video_info.get('player_response', [None])[0]
1742 player_response = extract_player_response(pl_response, video_id)
d8d24a92 1743 add_dash_mpd(video_info)
c2d125d9 1744 view_count = extract_view_count(video_info)
c108eb73
JMF
1745 else:
1746 age_gate = False
bc93bdb5 1747 video_info = None
dc4e4f90 1748 sts = None
d8d24a92 1749 # Try looking directly into the video webpage
a72778d3
S
1750 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1751 if ytplayer_config:
4e62ebe2 1752 args = ytplayer_config['args']
4c76aa06 1753 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1754 # Convert to the same format returned by compat_parse_qs
1755 video_info = dict((k, [v]) for k, v in args.items())
1756 add_dash_mpd(video_info)
6496ccb4
S
1757 # Rental video is not rented but preview is available (e.g.
1758 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
067aa17e 1759 # https://github.com/ytdl-org/youtube-dl/issues/10532)
6496ccb4
S
1760 if not video_info and args.get('ypc_vid'):
1761 return self.url_result(
1762 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1763 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1764 is_live = True
dc4e4f90 1765 sts = ytplayer_config.get('sts')
dbdaaa23 1766 if not player_response:
c2d125d9 1767 player_response = extract_player_response(args.get('player_response'), video_id)
0a3cf9ad 1768 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
561b456e 1769 add_dash_mpd_pr(player_response)
0a3cf9ad
S
1770 # We also try looking in get_video_info since it may contain different dashmpd
1771 # URL that points to a DASH manifest with possibly different itag set (some itags
1772 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
1773 # manifest pointed by get_video_info's dashmpd).
1774 # The general idea is to take a union of itags of both DASH manifests (for example
067aa17e 1775 # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
4e62ebe2 1776 self.report_video_info_webpage_download(video_id)
a61ce714 1777 for el in ('embedded', 'detailpage', 'vevo', ''):
dc4e4f90
S
1778 query = {
1779 'video_id': video_id,
1780 'ps': 'default',
1781 'eurl': '',
1782 'gl': 'US',
1783 'hl': 'en',
1784 }
1785 if el:
1786 query['el'] = el
1787 if sts:
1788 query['sts'] = sts
810fb84d 1789 video_info_webpage = self._download_webpage(
dc4e4f90 1790 '%s://www.youtube.com/get_video_info' % proto,
4e62ebe2 1791 video_id, note=False,
dc4e4f90
S
1792 errnote='unable to download video info webpage',
1793 fatal=False, query=query)
1794 if not video_info_webpage:
1795 continue
0a3cf9ad 1796 get_video_info = compat_parse_qs(video_info_webpage)
dbdaaa23
S
1797 if not player_response:
1798 pl_response = get_video_info.get('player_response', [None])[0]
c2d125d9 1799 player_response = extract_player_response(pl_response, video_id)
fd545fc6 1800 add_dash_mpd(get_video_info)
c7121fa7
S
1801 if view_count is None:
1802 view_count = extract_view_count(get_video_info)
0a3cf9ad
S
1803 if not video_info:
1804 video_info = get_video_info
026fbedc 1805 get_token = extract_token(get_video_info)
56667d62 1806 if get_token:
89ea063e
S
1807 # Different get_video_info requests may report different results, e.g.
1808 # some may report video unavailability, but some may serve it without
067aa17e 1809 # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
89ea063e
S
1810 # the original webpage as well as el=info and el=embedded get_video_info
1811 # requests report video unavailability due to geo restriction while
1812 # el=detailpage succeeds and returns valid data). This is probably
1813 # due to YouTube measures against IP ranges of hosting providers.
1814 # Working around by preferring the first succeeded video_info containing
1815 # the token if no such video_info yet was found.
026fbedc 1816 token = extract_token(video_info)
56667d62 1817 if not token:
44b2264f 1818 video_info = get_video_info
4e62ebe2 1819 break
bbb7c3f7
YCH
1820
1821 def extract_unavailable_message():
0add33ab
S
1822 messages = []
1823 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1824 msg = self._html_search_regex(
1825 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1826 video_webpage, 'unavailable %s' % kind, default=None)
1827 if msg:
1828 messages.append(msg)
1829 if messages:
1830 return '\n'.join(messages)
bbb7c3f7 1831
15be3eb5
RA
1832 if not video_info:
1833 unavailable_message = extract_unavailable_message()
1834 if not unavailable_message:
1835 unavailable_message = 'Unable to extract video data'
1836 raise ExtractorError(
1837 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1838
dbdaaa23
S
1839 video_details = try_get(
1840 player_response, lambda x: x['videoDetails'], dict) or {}
1841
8dbf751a
RA
1842 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1843 if not video_title:
cf7e015f
S
1844 self._downloader.report_warning('Unable to extract video title')
1845 video_title = '_'
1846
9cafc3fd 1847 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1848 if video_description:
fa4bc6e7
RA
1849
1850 def replace_url(m):
1851 redir_url = compat_urlparse.urljoin(url, m.group(1))
1852 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1853 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1854 qs = compat_parse_qs(parsed_redir_url.query)
1855 q = qs.get('q')
1856 if q and q[0]:
1857 return q[0]
1858 return redir_url
1859
9cafc3fd 1860 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1861 <a\s+
25cb7a0e 1862 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1863 (?:title|href)="([^"]+)"\s+
25cb7a0e 1864 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1865 class="[^"]*"[^>]*>
23f13e97 1866 [^<]+\.{3}\s*
cf7e015f 1867 </a>
fa4bc6e7 1868 ''', replace_url, video_description)
cf7e015f
S
1869 video_description = clean_html(video_description)
1870 else:
8dbf751a 1871 video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
cf7e015f 1872
8fe10494 1873 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1874 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1875 multifeed_metadata_list = try_get(
1876 player_response,
1877 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1878 compat_str) or try_get(
1879 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1880 if multifeed_metadata_list:
1881 entries = []
1882 feed_ids = []
1883 for feed in multifeed_metadata_list.split(','):
1884 # Unquote should take place before split on comma (,) since textual
1885 # fields may contain comma as well (see
067aa17e 1886 # https://github.com/ytdl-org/youtube-dl/issues/8536)
8fe10494
S
1887 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
1888 entries.append({
1889 '_type': 'url_transparent',
1890 'ie_key': 'Youtube',
1891 'url': smuggle_url(
1892 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1893 {'force_singlefeed': True}),
1894 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
1895 })
1896 feed_ids.append(feed_data['id'][0])
1897 self.to_screen(
1898 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1899 % (', '.join(feed_ids), video_id))
1900 return self.playlist_result(entries, video_id, video_title, video_description)
1901 else:
1902 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1903
c7121fa7 1904 if view_count is None:
1c9c8de2 1905 view_count = extract_view_count(video_info)
dbdaaa23
S
1906 if view_count is None and video_details:
1907 view_count = int_or_none(video_details.get('viewCount'))
1d699755 1908
27019dbb 1909 if is_live is None:
898238e9 1910 is_live = bool_or_none(video_details.get('isLive'))
27019dbb 1911
c5e8d7af
PH
1912 # Check for "rental" videos
1913 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
067aa17e 1914 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 1915
c63ca0ee
S
1916 def _extract_filesize(media_url):
1917 return int_or_none(self._search_regex(
1918 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1919
bf1317d2
S
1920 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1921 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1922
c5e8d7af
PH
1923 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1924 self.report_rtmp_download()
dd27fd17
PH
1925 formats = [{
1926 'format_id': '_rtmp',
1927 'protocol': 'rtmp',
1928 'url': video_info['conn'][0],
1929 'player_url': player_url,
1930 }]
bf1317d2 1931 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 1932 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1933 if 'rtmpe%3Dyes' in encoded_url_map:
067aa17e 1934 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
bf1317d2 1935 formats = []
3318832e 1936 formats_spec = {}
82156fdb 1937 fmt_list = video_info.get('fmt_list', [''])[0]
1938 if fmt_list:
1939 for fmt in fmt_list.split(','):
1940 spec = fmt.split('/')
3318832e 1941 if len(spec) > 1:
1942 width_height = spec[1].split('x')
1943 if len(width_height) == 2:
1944 formats_spec[spec[0]] = {
1945 'resolution': spec[1],
1946 'width': int_or_none(width_height[0]),
1947 'height': int_or_none(width_height[1]),
1948 }
bf1317d2
S
1949 for fmt in streaming_formats:
1950 itag = str_or_none(fmt.get('itag'))
1951 if not itag:
201e9eaa 1952 continue
bf1317d2
S
1953 quality = fmt.get('quality')
1954 quality_label = fmt.get('qualityLabel') or quality
1955 formats_spec[itag] = {
1956 'asr': int_or_none(fmt.get('audioSampleRate')),
1957 'filesize': int_or_none(fmt.get('contentLength')),
1958 'format_note': quality_label,
1959 'fps': int_or_none(fmt.get('fps')),
1960 'height': int_or_none(fmt.get('height')),
bf1317d2
S
1961 # bitrate for itag 43 is always 2147483647
1962 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1963 'width': int_or_none(fmt.get('width')),
1964 }
1965
1966 for fmt in streaming_formats:
1967 if fmt.get('drm_families'):
1968 continue
1969 url = url_or_none(fmt.get('url'))
1970
1971 if not url:
1972 cipher = fmt.get('cipher')
1973 if not cipher:
1974 continue
1975 url_data = compat_parse_qs(cipher)
1976 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1977 if not url:
1978 continue
1979 else:
1980 cipher = None
1981 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1982
2f483bc1
S
1983 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1984 # Unsupported FORMAT_STREAM_TYPE_OTF
1985 if stream_type == 3:
1986 continue
6449cd80 1987
bf1317d2
S
1988 format_id = fmt.get('itag') or url_data['itag'][0]
1989 if not format_id:
1990 continue
1991 format_id = compat_str(format_id)
a49eccdf 1992
bf1317d2
S
1993 if cipher:
1994 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1995 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1996 jsplayer_url_json = self._search_regex(
1997 ASSETS_RE,
1998 embed_webpage if age_gate else video_webpage,
1999 'JS player URL (1)', default=None)
2000 if not jsplayer_url_json and not age_gate:
2001 # We need the embed website after all
2002 if embed_webpage is None:
2003 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2004 embed_webpage = self._download_webpage(
2005 embed_url, video_id, 'Downloading embed webpage')
2006 jsplayer_url_json = self._search_regex(
2007 ASSETS_RE, embed_webpage, 'JS player URL')
2008
2009 player_url = json.loads(jsplayer_url_json)
cf010131 2010 if player_url is None:
bf1317d2
S
2011 player_url_json = self._search_regex(
2012 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2013 video_webpage, 'age gate player URL')
2014 player_url = json.loads(player_url_json)
2015
2016 if 'sig' in url_data:
2017 url += '&signature=' + url_data['sig'][0]
2018 elif 's' in url_data:
2019 encrypted_sig = url_data['s'][0]
2020
2021 if self._downloader.params.get('verbose'):
2022 if player_url is None:
2023 player_version = 'unknown'
2024 player_desc = 'unknown'
cf010131 2025 else:
bf1317d2
S
2026 if player_url.endswith('swf'):
2027 player_version = self._search_regex(
2028 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
2029 'flash player', fatal=False)
2030 player_desc = 'flash player %s' % player_version
2031 else:
2032 player_version = self._search_regex(
2033 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
2034 r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
2035 player_url,
2036 'html5 player', fatal=False)
2037 player_desc = 'html5 player %s' % player_version
2038
2039 parts_sizes = self._signature_cache_id(encrypted_sig)
2040 self.to_screen('{%s} signature length %s, %s' %
2041 (format_id, parts_sizes, player_desc))
2042
2043 signature = self._decrypt_signature(
2044 encrypted_sig, video_id, player_url, age_gate)
2045 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2046 url += '&%s=%s' % (sp, signature)
201e9eaa
PH
2047 if 'ratebypass' not in url:
2048 url += '&ratebypass=yes'
c9afb51c 2049
94278f72
YCH
2050 dct = {
2051 'format_id': format_id,
2052 'url': url,
2053 'player_url': player_url,
2054 }
2055 if format_id in self._formats:
2056 dct.update(self._formats[format_id])
3318832e 2057 if format_id in formats_spec:
2058 dct.update(formats_spec[format_id])
94278f72 2059
aabc2be6 2060 # Some itags are not included in DASH manifest thus corresponding formats will
067aa17e 2061 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
aabc2be6
S
2062 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2063 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2064 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 2065
bf1317d2
S
2066 if width is None:
2067 width = int_or_none(fmt.get('width'))
2068 if height is None:
2069 height = int_or_none(fmt.get('height'))
2070
c63ca0ee
S
2071 filesize = int_or_none(url_data.get(
2072 'clen', [None])[0]) or _extract_filesize(url)
2073
bf1317d2
S
2074 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2075 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2076
4878759f
S
2077 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2078 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
bf1317d2 2079 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
54fc90aa 2080
94278f72 2081 more_fields = {
c63ca0ee 2082 'filesize': filesize,
bf1317d2 2083 'tbr': tbr,
c9afb51c
AH
2084 'width': width,
2085 'height': height,
bf1317d2
S
2086 'fps': fps,
2087 'format_note': quality_label or quality,
c9afb51c 2088 }
94278f72
YCH
2089 for key, value in more_fields.items():
2090 if value:
2091 dct[key] = value
bf1317d2 2092 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
aabc2be6
S
2093 if type_:
2094 type_split = type_.split(';')
2095 kind_ext = type_split[0].split('/')
2096 if len(kind_ext) == 2:
94278f72
YCH
2097 kind, _ = kind_ext
2098 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
2099 if kind in ('audio', 'video'):
2100 codecs = None
2101 for mobj in re.finditer(
2102 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2103 if mobj.group('key') == 'codecs':
2104 codecs = mobj.group('val')
2105 break
2106 if codecs:
6310acf5 2107 dct.update(parse_codecs(codecs))
e4a60912
S
2108 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2109 dct['downloader_options'] = {
2110 # Youtube throttles chunks >~10M
2111 'http_chunk_size': 10485760,
2112 }
aabc2be6 2113 formats.append(dct)
c5e8d7af 2114 else:
c3e54389
S
2115 manifest_url = (
2116 url_or_none(try_get(
2117 player_response,
2118 lambda x: x['streamingData']['hlsManifestUrl'],
3089bc74
S
2119 compat_str))
2120 or url_or_none(try_get(
c3e54389
S
2121 video_info, lambda x: x['hlsvp'][0], compat_str)))
2122 if manifest_url:
2123 formats = []
2124 m3u8_formats = self._extract_m3u8_formats(
2125 manifest_url, video_id, 'mp4', fatal=False)
2126 for a_format in m3u8_formats:
2127 itag = self._search_regex(
2128 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2129 if itag:
2130 a_format['format_id'] = itag
2131 if itag in self._formats:
2132 dct = self._formats[itag].copy()
2133 dct.update(a_format)
2134 a_format = dct
2135 a_format['player_url'] = player_url
2136 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2137 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2138 formats.append(a_format)
2139 else:
13577349 2140 error_message = extract_unavailable_message()
c3e54389 2141 if not error_message:
13577349
S
2142 error_message = clean_html(try_get(
2143 player_response, lambda x: x['playabilityStatus']['reason'],
2144 compat_str))
2145 if not error_message:
2146 error_message = clean_html(
2147 try_get(video_info, lambda x: x['reason'][0], compat_str))
c3e54389
S
2148 if error_message:
2149 raise ExtractorError(error_message, expected=True)
2150 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 2151
7e72694b 2152 # uploader
dbdaaa23
S
2153 video_uploader = try_get(
2154 video_info, lambda x: x['author'][0],
2155 compat_str) or str_or_none(video_details.get('author'))
7e72694b
S
2156 if video_uploader:
2157 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2158 else:
2159 self._downloader.report_warning('unable to extract uploader name')
2160
2161 # uploader_id
2162 video_uploader_id = None
2163 video_uploader_url = None
2164 mobj = re.search(
2165 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2166 video_webpage)
2167 if mobj is not None:
2168 video_uploader_id = mobj.group('uploader_id')
2169 video_uploader_url = mobj.group('uploader_url')
2170 else:
2171 self._downloader.report_warning('unable to extract uploader nickname')
2172
b45a9e69 2173 channel_id = (
3089bc74
S
2174 str_or_none(video_details.get('channelId'))
2175 or self._html_search_meta(
2176 'channelId', video_webpage, 'channel id', default=None)
2177 or self._search_regex(
b45a9e69 2178 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2179 video_webpage, 'channel id', default=None, group='id'))
dd4c4492
S
2180 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2181
7e72694b
S
2182 # thumbnail image
2183 # We try first to get a high quality image:
2184 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2185 video_webpage, re.DOTALL)
2186 if m_thumb is not None:
2187 video_thumbnail = m_thumb.group(1)
2188 elif 'thumbnail_url' not in video_info:
2189 self._downloader.report_warning('unable to extract video thumbnail')
2190 video_thumbnail = None
2191 else: # don't panic if we can't find it
2192 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2193
2194 # upload date
2195 upload_date = self._html_search_meta(
2196 'datePublished', video_webpage, 'upload date', default=None)
2197 if not upload_date:
2198 upload_date = self._search_regex(
2199 [r'(?s)id="eow-date.*?>(.*?)</span>',
2200 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2201 video_webpage, 'upload date', default=None)
2202 upload_date = unified_strdate(upload_date)
2203
2204 video_license = self._html_search_regex(
2205 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2206 video_webpage, 'license', default=None)
2207
2208 m_music = re.search(
2209 r'''(?x)
2210 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2211 <ul[^>]*>\s*
2212 <li>(?P<title>.+?)
2213 by (?P<creator>.+?)
2214 (?:
2215 \(.+?\)|
2216 <a[^>]*
2217 (?:
2218 \bhref=["\']/red[^>]*>| # drop possible
2219 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2220 )
2221 .*?
2222 )?</li
2223 ''',
2224 video_webpage)
2225 if m_music:
2226 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2227 video_creator = clean_html(m_music.group('creator'))
2228 else:
2229 video_alt_title = video_creator = None
2230
2231 def extract_meta(field):
2232 return self._html_search_regex(
2233 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2234 video_webpage, field, default=None)
2235
2236 track = extract_meta('Song')
2237 artist = extract_meta('Artist')
92bc97d3 2238 album = extract_meta('Album')
822b9d9c
RA
2239
2240 # Youtube Music Auto-generated description
92bc97d3 2241 release_date = release_year = None
822b9d9c
RA
2242 if video_description:
2243 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2244 if mobj:
2245 if not track:
2246 track = mobj.group('track').strip()
2247 if not artist:
2248 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
92bc97d3
RA
2249 if not album:
2250 album = mobj.group('album'.strip())
822b9d9c
RA
2251 release_year = mobj.group('release_year')
2252 release_date = mobj.group('release_date')
2253 if release_date:
2254 release_date = release_date.replace('-', '')
2255 if not release_year:
2256 release_year = int(release_date[:4])
2257 if release_year:
2258 release_year = int(release_year)
7e72694b
S
2259
2260 m_episode = re.search(
2261 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2262 video_webpage)
2263 if m_episode:
c2dd2dc0 2264 series = unescapeHTML(m_episode.group('series'))
7e72694b
S
2265 season_number = int(m_episode.group('season'))
2266 episode_number = int(m_episode.group('episode'))
2267 else:
2268 series = season_number = episode_number = None
2269
2270 m_cat_container = self._search_regex(
2271 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2272 video_webpage, 'categories', default=None)
2273 if m_cat_container:
2274 category = self._html_search_regex(
2275 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2276 default=None)
2277 video_categories = None if category is None else [category]
2278 else:
2279 video_categories = None
2280
2281 video_tags = [
2282 unescapeHTML(m.group('content'))
2283 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2284
2285 def _extract_count(count_name):
2286 return str_to_int(self._search_regex(
2287 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2288 % re.escape(count_name),
2289 video_webpage, count_name, default=None))
2290
2291 like_count = _extract_count('like')
2292 dislike_count = _extract_count('dislike')
2293
dbdaaa23
S
2294 if view_count is None:
2295 view_count = str_to_int(self._search_regex(
2296 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2297 'view count', default=None))
2298
bf3c9326
S
2299 average_rating = (
2300 float_or_none(video_details.get('averageRating'))
2301 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2302
7e72694b
S
2303 # subtitles
2304 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2305 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2306
2307 video_duration = try_get(
2308 video_info, lambda x: int_or_none(x['length_seconds'][0]))
dbdaaa23
S
2309 if not video_duration:
2310 video_duration = int_or_none(video_details.get('lengthSeconds'))
7e72694b
S
2311 if not video_duration:
2312 video_duration = parse_duration(self._html_search_meta(
2313 'duration', video_webpage, 'video duration'))
2314
2315 # annotations
2316 video_annotations = None
2317 if self._downloader.params.get('writeannotations', False):
64b6a4e9
RA
2318 xsrf_token = self._search_regex(
2319 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2320 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2321 invideo_url = try_get(
2322 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2323 if xsrf_token and invideo_url:
2324 xsrf_field_name = self._search_regex(
2325 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2326 video_webpage, 'xsrf field name',
2327 group='xsrf_field_name', default='session_token')
2328 video_annotations = self._download_webpage(
2329 self._proto_relative_url(invideo_url),
2330 video_id, note='Downloading annotations',
2331 errnote='Unable to download video annotations', fatal=False,
2332 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b
S
2333
2334 chapters = self._extract_chapters(description_original, video_duration)
2335
dd27fd17 2336 # Look for the DASH manifest
203fb43f 2337 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2338 dash_mpd_fatal = True
8ff648e4 2339 for mpd_url in dash_mpds:
d8d24a92 2340 dash_formats = {}
774e208f 2341 try:
05d0d131
YCH
2342 def decrypt_sig(mobj):
2343 s = mobj.group(1)
2344 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2345 return '/signature/%s' % dec_s
2346
8ff648e4 2347 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2348
8ff648e4 2349 for df in self._extract_mpd_formats(
2350 mpd_url, video_id, fatal=dash_mpd_fatal,
2351 formats_dict=self._formats):
c63ca0ee
S
2352 if not df.get('filesize'):
2353 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2354 # Do not overwrite DASH format found in some previous DASH manifest
2355 if df['format_id'] not in dash_formats:
2356 dash_formats[df['format_id']] = df
77c6fb5b
S
2357 # Additional DASH manifests may end up in HTTP Error 403 therefore
2358 # allow them to fail without bug report message if we already have
2359 # some DASH manifest succeeded. This is temporary workaround to reduce
2360 # burst of bug reports until we figure out the reason and whether it
2361 # can be fixed at all.
2362 dash_mpd_fatal = False
774e208f
PH
2363 except (ExtractorError, KeyError) as e:
2364 self.report_warning(
2365 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2366 if dash_formats:
04b3b3df
JMF
2367 # Remove the formats we found through non-DASH, they
2368 # contain less info and it can be wrong, because we use
2369 # fixed values (for example the resolution). See
067aa17e 2370 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
04b3b3df 2371 # example.
d80265cc 2372 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2373 formats.extend(dash_formats.values())
d80044c2 2374
6271f1ca
PH
2375 # Check for malformed aspect ratio
2376 stretched_m = re.search(
2377 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2378 video_webpage)
2379 if stretched_m:
313dfc45
LL
2380 w = float(stretched_m.group('w'))
2381 h = float(stretched_m.group('h'))
5faf9fed
S
2382 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2383 # We will only process correct ratios.
313dfc45 2384 if w > 0 and h > 0:
41f24c32 2385 ratio = w / h
313dfc45
LL
2386 for f in formats:
2387 if f.get('vcodec') != 'none':
2388 f['stretched_ratio'] = ratio
6271f1ca 2389
026fbedc
S
2390 if not formats:
2391 token = extract_token(video_info)
2392 if not token:
2393 if 'reason' in video_info:
2394 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2395 regions_allowed = self._html_search_meta(
2396 'regionsAllowed', video_webpage, default=None)
2397 countries = regions_allowed.split(',') if regions_allowed else None
2398 self.raise_geo_restricted(
2399 msg=video_info['reason'][0], countries=countries)
2400 reason = video_info['reason'][0]
2401 if 'Invalid parameters' in reason:
2402 unavailable_message = extract_unavailable_message()
2403 if unavailable_message:
2404 reason = unavailable_message
2405 raise ExtractorError(
2406 'YouTube said: %s' % reason,
2407 expected=True, video_id=video_id)
2408 else:
2409 raise ExtractorError(
2410 '"token" parameter not in video info for unknown reason',
2411 video_id=video_id)
2412
0d297518
RA
2413 if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
2414 raise ExtractorError('This video is DRM protected.', expected=True)
2415
4bcc7bd1 2416 self._sort_formats(formats)
4ea3be0a 2417
21c340b8 2418 self.mark_watched(video_id, video_info, player_response)
d77ab8e2 2419
4ea3be0a 2420 return {
8bcc8756
JW
2421 'id': video_id,
2422 'uploader': video_uploader,
2423 'uploader_id': video_uploader_id,
fd050249 2424 'uploader_url': video_uploader_url,
dd4c4492
S
2425 'channel_id': channel_id,
2426 'channel_url': channel_url,
8bcc8756 2427 'upload_date': upload_date,
7caf9830 2428 'license': video_license,
936784b2 2429 'creator': video_creator or artist,
8bcc8756 2430 'title': video_title,
936784b2 2431 'alt_title': video_alt_title or track,
8bcc8756
JW
2432 'thumbnail': video_thumbnail,
2433 'description': video_description,
2434 'categories': video_categories,
000b6b5a 2435 'tags': video_tags,
8bcc8756 2436 'subtitles': video_subtitles,
360e1ca5 2437 'automatic_captions': automatic_captions,
8bcc8756
JW
2438 'duration': video_duration,
2439 'age_limit': 18 if age_gate else 0,
2440 'annotations': video_annotations,
9cafc3fd 2441 'chapters': chapters,
7e8c0af0 2442 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2443 'view_count': view_count,
4ea3be0a 2444 'like_count': like_count,
2445 'dislike_count': dislike_count,
bf3c9326 2446 'average_rating': average_rating,
8bcc8756 2447 'formats': formats,
2fe1ff85 2448 'is_live': is_live,
7c80519c 2449 'start_time': start_time,
297a564b 2450 'end_time': end_time,
12afdc2a
S
2451 'series': series,
2452 'season_number': season_number,
2453 'episode_number': episode_number,
936784b2
S
2454 'track': track,
2455 'artist': artist,
5caabd3c 2456 'album': album,
2457 'release_date': release_date,
2458 'release_year': release_year,
4ea3be0a 2459 }
c5e8d7af 2460
5f6a1245 2461
8e7aad20 2462class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2463 IE_DESC = 'YouTube.com playlists'
d67cc9fa 2464 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
2465 (?:https?://)?
2466 (?:\w+\.)?
c5e8d7af 2467 (?:
c0345b82
S
2468 (?:
2469 youtube\.com|
2470 invidio\.us
2471 )
2472 /
feaa5ad7 2473 (?:
87dadd45 2474 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
feaa5ad7
S
2475 \? (?:.*?[&;])*? (?:p|a|list)=
2476 | p/
2477 )|
2478 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
c5e8d7af 2479 )
d67cc9fa 2480 (
409b9324 2481 (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
5f6a1245 2482 # Top tracks, they can also include dots
d67cc9fa
JMF
2483 |(?:MC)[\w\.]*
2484 )
c5e8d7af
PH
2485 .*
2486 |
d0ba5587
S
2487 (%(playlist_id)s)
2488 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
8d81f3e3 2489 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
351f37c0
S
2490 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2491 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
78caa52a 2492 IE_NAME = 'youtube:playlist'
81127aa5
PH
2493 _TESTS = [{
2494 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
2495 'info_dict': {
2496 'title': 'ytdl test PL',
a1cf99d0 2497 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
81127aa5
PH
2498 },
2499 'playlist_count': 3,
9291475f
PH
2500 }, {
2501 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
2502 'info_dict': {
acf757f4 2503 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
9291475f
PH
2504 'title': 'YDL_Empty_List',
2505 },
2506 'playlist_count': 0,
4201ba13 2507 'skip': 'This playlist is private',
9291475f
PH
2508 }, {
2509 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2510 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2511 'info_dict': {
2512 'title': '29C3: Not my department',
acf757f4 2513 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
13a75688
S
2514 'uploader': 'Christiaan008',
2515 'uploader_id': 'ChRiStIaAn008',
9291475f
PH
2516 },
2517 'playlist_count': 95,
2518 }, {
2519 'note': 'issue #673',
2520 'url': 'PLBB231211A4F62143',
2521 'info_dict': {
f46a8702 2522 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 2523 'id': 'PLBB231211A4F62143',
13a75688
S
2524 'uploader': 'Wickydoo',
2525 'uploader_id': 'Wickydoo',
9291475f
PH
2526 },
2527 'playlist_mincount': 26,
2528 }, {
2529 'note': 'Large playlist',
2530 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2531 'info_dict': {
2532 'title': 'Uploads from Cauchemar',
acf757f4 2533 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
13a75688
S
2534 'uploader': 'Cauchemar',
2535 'uploader_id': 'Cauchemar89',
9291475f
PH
2536 },
2537 'playlist_mincount': 799,
2538 }, {
2539 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2540 'info_dict': {
2541 'title': 'YDL_safe_search',
acf757f4 2542 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
2543 },
2544 'playlist_count': 2,
4201ba13 2545 'skip': 'This playlist is private',
ac7553d0
PH
2546 }, {
2547 'note': 'embedded',
2d3d2997 2548 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0
PH
2549 'playlist_count': 4,
2550 'info_dict': {
2551 'title': 'JODA15',
acf757f4 2552 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
13a75688
S
2553 'uploader': 'milan',
2554 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
ac7553d0 2555 }
87dadd45
S
2556 }, {
2557 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2558 'playlist_mincount': 485,
2559 'info_dict': {
13a75688 2560 'title': '2018 Chinese New Singles (11/6 updated)',
87dadd45 2561 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
13a75688
S
2562 'uploader': 'LBK',
2563 'uploader_id': 'sdragonfang',
87dadd45 2564 }
6b08cdf6
PH
2565 }, {
2566 'note': 'Embedded SWF player',
2d3d2997 2567 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
6b08cdf6
PH
2568 'playlist_count': 4,
2569 'info_dict': {
2570 'title': 'JODA7',
acf757f4 2571 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
13a75688
S
2572 },
2573 'skip': 'This playlist does not exist',
4b7df0d3
JMF
2574 }, {
2575 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2576 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2577 'info_dict': {
acf757f4
PH
2578 'title': 'Uploads from Interstellar Movie',
2579 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688
S
2580 'uploader': 'Interstellar Movie',
2581 'uploader_id': 'InterstellarMovie1',
4b7df0d3 2582 },
481cc733 2583 'playlist_mincount': 21,
dacb3a86
S
2584 }, {
2585 # Playlist URL that does not actually serve a playlist
2586 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2587 'info_dict': {
2588 'id': 'FqZTN594JQw',
2589 'ext': 'webm',
2590 'title': "Smiley's People 01 detective, Adventure Series, Action",
2591 'uploader': 'STREEM',
2592 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2593 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2594 'upload_date': '20150526',
2595 'license': 'Standard YouTube License',
2596 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2597 'categories': ['People & Blogs'],
2598 'tags': list,
dbdaaa23 2599 'view_count': int,
dacb3a86
S
2600 'like_count': int,
2601 'dislike_count': int,
2602 },
2603 'params': {
2604 'skip_download': True,
2605 },
13a75688 2606 'skip': 'This video is not available.',
dacb3a86 2607 'add_ie': [YoutubeIE.ie_key()],
481cc733
S
2608 }, {
2609 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2610 'info_dict': {
2611 'id': 'yeWKywCrFtk',
2612 'ext': 'mp4',
2613 'title': 'Small Scale Baler and Braiding Rugs',
2614 'uploader': 'Backus-Page House Museum',
2615 'uploader_id': 'backuspagemuseum',
ec85ded8 2616 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
481cc733 2617 'upload_date': '20161008',
481cc733
S
2618 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2619 'categories': ['Nonprofits & Activism'],
2620 'tags': list,
2621 'like_count': int,
2622 'dislike_count': int,
2623 },
2624 'params': {
2625 'noplaylist': True,
2626 'skip_download': True,
2627 },
2e18adec
S
2628 }, {
2629 # https://github.com/ytdl-org/youtube-dl/issues/21844
2630 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2631 'info_dict': {
2632 'title': 'Data Analysis with Dr Mike Pound',
2633 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2634 'uploader_id': 'Computerphile',
2635 'uploader': 'Computerphile',
2636 },
2637 'playlist_mincount': 11,
feaa5ad7
S
2638 }, {
2639 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2640 'only_matching': True,
a6857510
S
2641 }, {
2642 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2643 'only_matching': True,
409b9324
S
2644 }, {
2645 # music album playlist
2646 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2647 'only_matching': True,
c0345b82
S
2648 }, {
2649 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2650 'only_matching': True,
81127aa5 2651 }]
c5e8d7af 2652
880e1c52
JMF
2653 def _real_initialize(self):
2654 self._login()
2655
351f37c0
S
2656 def extract_videos_from_page(self, page):
2657 ids_in_page = []
2658 titles_in_page = []
2659
2660 for item in re.findall(
2661 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2662 attrs = extract_attributes(item)
2663 video_id = attrs['data-video-id']
2664 video_title = unescapeHTML(attrs.get('data-title'))
2665 if video_title:
2666 video_title = video_title.strip()
2667 ids_in_page.append(video_id)
2668 titles_in_page.append(video_title)
2669
2670 # Fallback with old _VIDEO_RE
2671 self.extract_videos_from_page_impl(
2672 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2673
2674 # Relaxed fallbacks
2675 self.extract_videos_from_page_impl(
2676 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2677 ids_in_page, titles_in_page)
2678 self.extract_videos_from_page_impl(
2679 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2680 ids_in_page, titles_in_page)
2681
2682 return zip(ids_in_page, titles_in_page)
2683
652cdaa2 2684 def _extract_mix(self, playlist_id):
99209c29 2685 # The mixes are generated from a single video
652cdaa2 2686 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
2687 ids = []
2688 last_id = playlist_id[-11:]
2689 for n in itertools.count(1):
2690 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2691 webpage = self._download_webpage(
2692 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2693 new_ids = orderedSet(re.findall(
2694 r'''(?xs)data-video-username=".*?".*?
2695 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2696 webpage))
2697 # Fetch new pages until all the videos are repeated, it seems that
2698 # there are always 51 unique videos.
2699 new_ids = [_id for _id in new_ids if _id not in ids]
2700 if not new_ids:
2701 break
2702 ids.extend(new_ids)
2703 last_id = ids[-1]
2704
2705 url_results = self._ids_to_results(ids)
2706
bc2f773b 2707 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5 2708 title_span = (
3089bc74
S
2709 search_title('playlist-title')
2710 or search_title('title long-title')
2711 or search_title('title'))
76d1700b 2712 title = clean_html(title_span)
652cdaa2
JMF
2713
2714 return self.playlist_result(url_results, playlist_id, title)
2715
448830ce 2716 def _extract_playlist(self, playlist_id):
dbb94fb0
S
2717 url = self._TEMPLATE_URL % playlist_id
2718 page = self._download_webpage(url, playlist_id)
dbb94fb0 2719
067aa17e 2720 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
8bc0800d 2721 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
39b62db1
YCH
2722 match = match.strip()
2723 # Check if the playlist exists or is private
4201ba13
S
2724 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2725 if mobj:
2726 reason = mobj.group('reason')
2727 message = 'This playlist %s' % reason
2728 if 'private' in reason:
2729 message += ', use --username or --netrc to access it'
2730 message += '.'
2731 raise ExtractorError(message, expected=True)
39b62db1
YCH
2732 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2733 raise ExtractorError(
2734 'Invalid parameters. Maybe URL is incorrect.',
2735 expected=True)
2736 elif re.match(r'[^<]*Choose your language[^<]*', match):
2737 continue
2738 else:
2739 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 2740
dbb94fb0 2741 playlist_title = self._html_search_regex(
63b4295d 2742 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
dacb3a86 2743 page, 'title', default=None)
c5e8d7af 2744
07aeced6 2745 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
4e3f1f04 2746 uploader = self._html_search_regex(
07aeced6
S
2747 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2748 page, 'uploader', default=None)
2749 mobj = re.search(
2750 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2751 page)
2752 if mobj:
2753 uploader_id = mobj.group('uploader_id')
2754 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2755 else:
2756 uploader_id = uploader_url = None
2757
dacb3a86
S
2758 has_videos = True
2759
2760 if not playlist_title:
2761 try:
2762 # Some playlist URLs don't actually serve a playlist (e.g.
2763 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2764 next(self._entries(page, playlist_id))
2765 except StopIteration:
2766 has_videos = False
2767
07aeced6 2768 playlist = self.playlist_result(
dacb3a86 2769 self._entries(page, playlist_id), playlist_id, playlist_title)
07aeced6
S
2770 playlist.update({
2771 'uploader': uploader,
2772 'uploader_id': uploader_id,
2773 'uploader_url': uploader_url,
2774 })
2775
2776 return has_videos, playlist
c5e8d7af 2777
ebf1b291 2778 def _check_download_just_video(self, url, playlist_id):
448830ce
S
2779 # Check if it's a video-specific URL
2780 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
481cc733 2781 video_id = query_dict.get('v', [None])[0] or self._search_regex(
87dadd45 2782 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
481cc733
S
2783 'video id', default=None)
2784 if video_id:
448830ce
S
2785 if self._downloader.params.get('noplaylist'):
2786 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
dacb3a86 2787 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce
S
2788 else:
2789 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
dacb3a86
S
2790 return video_id, None
2791 return None, None
448830ce 2792
ebf1b291
S
2793 def _real_extract(self, url):
2794 # Extract playlist id
2795 mobj = re.match(self._VALID_URL, url)
2796 if mobj is None:
2797 raise ExtractorError('Invalid URL: %s' % url)
2798 playlist_id = mobj.group(1) or mobj.group(2)
2799
dacb3a86 2800 video_id, video = self._check_download_just_video(url, playlist_id)
ebf1b291
S
2801 if video:
2802 return video
2803
466a6145 2804 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
2805 # Mixes require a custom extraction process
2806 return self._extract_mix(playlist_id)
2807
dacb3a86
S
2808 has_videos, playlist = self._extract_playlist(playlist_id)
2809 if has_videos or not video_id:
2810 return playlist
2811
2812 # Some playlist URLs don't actually serve a playlist (see
067aa17e 2813 # https://github.com/ytdl-org/youtube-dl/issues/10537).
dacb3a86
S
2814 # Fallback to plain video extraction if there is a video id
2815 # along with playlist id.
2816 return self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce 2817
c5e8d7af 2818
648e6a1f 2819class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2820 IE_DESC = 'YouTube.com channels'
cd5a74a2 2821 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 2822 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 2823 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 2824 IE_NAME = 'youtube:channel'
cdc628a4
PH
2825 _TESTS = [{
2826 'note': 'paginated channel',
2827 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2828 'playlist_mincount': 91,
acf757f4 2829 'info_dict': {
9170ca5b
JMF
2830 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2831 'title': 'Uploads from lex will',
13a75688
S
2832 'uploader': 'lex will',
2833 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
acf757f4 2834 }
5c43afd4
JMF
2835 }, {
2836 'note': 'Age restricted channel',
2837 # from https://www.youtube.com/user/DeusExOfficial
2838 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2839 'playlist_mincount': 64,
2840 'info_dict': {
2841 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2842 'title': 'Uploads from Deus Ex',
13a75688
S
2843 'uploader': 'Deus Ex',
2844 'uploader_id': 'DeusExOfficial',
5c43afd4 2845 },
cd5a74a2
S
2846 }, {
2847 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2848 'only_matching': True,
cdc628a4 2849 }]
c5e8d7af 2850
e462474e
S
2851 @classmethod
2852 def suitable(cls, url):
f07e276a
S
2853 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2854 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 2855
9558dcec
S
2856 def _build_template_url(self, url, channel_id):
2857 return self._TEMPLATE_URL % channel_id
2858
c5e8d7af 2859 def _real_extract(self, url):
9ff67727 2860 channel_id = self._match_id(url)
c5e8d7af 2861
9558dcec 2862 url = self._build_template_url(url, channel_id)
386bdfa6
S
2863
2864 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2865 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2866 # otherwise fallback on channel by page extraction
2867 channel_page = self._download_webpage(
2868 url + '?view=57', channel_id,
2869 'Downloading channel page', fatal=False)
2b3c2546
PH
2870 if channel_page is False:
2871 channel_playlist_id = False
2872 else:
2873 channel_playlist_id = self._html_search_meta(
2874 'channelId', channel_page, 'channel id', default=None)
2875 if not channel_playlist_id:
73c4ac2c
S
2876 channel_url = self._html_search_meta(
2877 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2878 channel_page, 'channel url', default=None)
2879 if channel_url:
2880 channel_playlist_id = self._search_regex(
2881 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2882 channel_url, 'channel id', default=None)
386bdfa6
S
2883 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2884 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
2885 return self.url_result(
2886 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 2887
60bf45c8 2888 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
2889 autogenerated = re.search(r'''(?x)
2890 class="[^"]*?(?:
2891 channel-header-autogenerated-label|
2892 yt-channel-title-autogenerated
2893 )[^"]*"''', channel_page) is not None
c5e8d7af 2894
b9643eed
JMF
2895 if autogenerated:
2896 # The videos are contained in a single page
2897 # the ajax pages can't be used, they are empty
b82f815f 2898 entries = [
fb69240c
S
2899 self.url_result(
2900 video_id, 'Youtube', video_id=video_id,
2901 video_title=video_title)
8f02ad4f 2902 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
2903 return self.playlist_result(entries, channel_id)
2904
73c4ac2c
S
2905 try:
2906 next(self._entries(channel_page, channel_id))
2907 except StopIteration:
2908 alert_message = self._html_search_regex(
2909 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2910 channel_page, 'alert', default=None, group='alert')
2911 if alert_message:
2912 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2913
648e6a1f 2914 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
2915
2916
eb0f3e7e 2917class YoutubeUserIE(YoutubeChannelIE):
78caa52a 2918 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
ea696249 2919 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
9558dcec 2920 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 2921 IE_NAME = 'youtube:user'
c5e8d7af 2922
cdc628a4
PH
2923 _TESTS = [{
2924 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2925 'playlist_mincount': 320,
2926 'info_dict': {
73c4ac2c
S
2927 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2928 'title': 'Uploads from The Linux Foundation',
13a75688
S
2929 'uploader': 'The Linux Foundation',
2930 'uploader_id': 'TheLinuxFoundation',
cdc628a4 2931 }
9558dcec
S
2932 }, {
2933 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2934 # but not https://www.youtube.com/user/12minuteathlete/videos
2935 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2936 'playlist_mincount': 249,
2937 'info_dict': {
2938 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2939 'title': 'Uploads from 12 Minute Athlete',
13a75688
S
2940 'uploader': '12 Minute Athlete',
2941 'uploader_id': 'the12minuteathlete',
9558dcec 2942 }
cdc628a4
PH
2943 }, {
2944 'url': 'ytuser:phihag',
2945 'only_matching': True,
daa0df9e
YCH
2946 }, {
2947 'url': 'https://www.youtube.com/c/gametrailers',
2948 'only_matching': True,
9558dcec
S
2949 }, {
2950 'url': 'https://www.youtube.com/gametrailers',
2951 'only_matching': True,
73c4ac2c 2952 }, {
0e879f43 2953 # This channel is not available, geo restricted to JP
73c4ac2c
S
2954 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2955 'only_matching': True,
cdc628a4
PH
2956 }]
2957
e3ea4790 2958 @classmethod
f4b05232 2959 def suitable(cls, url):
e3ea4790
JMF
2960 # Don't return True if the url can be extracted with other youtube
2961 # extractor, the regex would is too permissive and it would match.
f3a58d46 2962 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2963 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
2964 return False
2965 else:
2966 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 2967
9558dcec
S
2968 def _build_template_url(self, url, channel_id):
2969 mobj = re.match(self._VALID_URL, url)
2970 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2971
b05654f0 2972
f07e276a
S
2973class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2974 IE_DESC = 'YouTube.com live streams'
073d5bf5 2975 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
f07e276a
S
2976 IE_NAME = 'youtube:live'
2977
2978 _TESTS = [{
2d3d2997 2979 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
f07e276a
S
2980 'info_dict': {
2981 'id': 'a48o2S1cPoo',
2982 'ext': 'mp4',
2983 'title': 'The Young Turks - Live Main Show',
2984 'uploader': 'The Young Turks',
2985 'uploader_id': 'TheYoungTurks',
ec85ded8 2986 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
f07e276a
S
2987 'upload_date': '20150715',
2988 'license': 'Standard YouTube License',
2989 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2990 'categories': ['News & Politics'],
2991 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2992 'like_count': int,
2993 'dislike_count': int,
2994 },
2995 'params': {
2996 'skip_download': True,
2997 },
2998 }, {
2d3d2997 2999 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
f07e276a 3000 'only_matching': True,
c1b2a085
S
3001 }, {
3002 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3003 'only_matching': True,
073d5bf5
S
3004 }, {
3005 'url': 'https://www.youtube.com/TheYoungTurks/live',
3006 'only_matching': True,
f07e276a
S
3007 }]
3008
3009 def _real_extract(self, url):
3010 mobj = re.match(self._VALID_URL, url)
3011 channel_id = mobj.group('id')
3012 base_url = mobj.group('base_url')
3013 webpage = self._download_webpage(url, channel_id, fatal=False)
3014 if webpage:
3015 page_type = self._og_search_property(
e7f3529f 3016 'type', webpage, 'page type', default='')
f07e276a
S
3017 video_id = self._html_search_meta(
3018 'videoId', webpage, 'video id', default=None)
e7f3529f
S
3019 if page_type.startswith('video') and video_id and re.match(
3020 r'^[0-9A-Za-z_-]{11}$', video_id):
f07e276a
S
3021 return self.url_result(video_id, YoutubeIE.ie_key())
3022 return self.url_result(base_url)
3023
3024
e462474e
S
3025class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3026 IE_DESC = 'YouTube.com user/channel playlists'
3027 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
3028 IE_NAME = 'youtube:playlists'
0c148415 3029
e568c223 3030 _TESTS = [{
2d3d2997 3031 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
0c148415
S
3032 'playlist_mincount': 4,
3033 'info_dict': {
3034 'id': 'ThirstForScience',
13a75688 3035 'title': 'ThirstForScience',
0c148415 3036 },
e568c223
S
3037 }, {
3038 # with "Load more" button
2d3d2997 3039 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
e568c223
S
3040 'playlist_mincount': 70,
3041 'info_dict': {
3042 'id': 'igorkle1',
3043 'title': 'Игорь Клейнер',
3044 },
e462474e
S
3045 }, {
3046 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3047 'playlist_mincount': 17,
3048 'info_dict': {
3049 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3050 'title': 'Chem Player',
3051 },
13a75688 3052 'skip': 'Blocked',
e568c223 3053 }]
0c148415
S
3054
3055
870f3bfc
S
3056class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3057 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3058
3059
3060class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
78caa52a 3061 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
3062 # there doesn't appear to be a real limit, for example if you search for
3063 # 'python' you get more than 8.000.000 results
3064 _MAX_RESULTS = float('inf')
78caa52a 3065 IE_NAME = 'youtube:search'
b05654f0 3066 _SEARCH_KEY = 'ytsearch'
b4c08069 3067 _EXTRA_QUERY_ARGS = {}
9dd8e46a 3068 _TESTS = []
b05654f0 3069
b05654f0
PH
3070 def _get_n_results(self, query, n):
3071 """Get a specified number of results for a query"""
3072
b4c08069 3073 videos = []
b05654f0
PH
3074 limit = n
3075
a22b2fd1
YCH
3076 url_query = {
3077 'search_query': query.encode('utf-8'),
3078 }
3079 url_query.update(self._EXTRA_QUERY_ARGS)
3080 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3081
b4c08069 3082 for pagenum in itertools.count(1):
b4c08069 3083 data = self._download_json(
69ea8ca4 3084 result_url, video_id='query "%s"' % query,
b4c08069 3085 note='Downloading page %s' % pagenum,
a22b2fd1
YCH
3086 errnote='Unable to download API page',
3087 query={'spf': 'navigate'})
b4c08069 3088 html_content = data[1]['body']['content']
7cc3570e 3089
b4c08069 3090 if 'class="search-message' in html_content:
07ad22b8 3091 raise ExtractorError(
78caa52a 3092 '[youtube] No video results', expected=True)
b05654f0 3093
870f3bfc 3094 new_videos = list(self._process_page(html_content))
b4c08069
JMF
3095 videos += new_videos
3096 if not new_videos or len(videos) > limit:
3097 break
a22b2fd1
YCH
3098 next_link = self._html_search_regex(
3099 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3100 html_content, 'next link', default=None)
3101 if next_link is None:
3102 break
3103 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
b05654f0 3104
b4c08069
JMF
3105 if len(videos) > n:
3106 videos = videos[:n]
b05654f0 3107 return self.playlist_result(videos, query)
75dff0ee 3108
c9ae7b95 3109
a3dd9248 3110class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3111 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3112 _SEARCH_KEY = 'ytsearchdate'
78caa52a 3113 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 3114 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 3115
c9ae7b95 3116
870f3bfc 3117class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
78caa52a
PH
3118 IE_DESC = 'YouTube.com search URLs'
3119 IE_NAME = 'youtube:search_url'
d2c1f79f 3120 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
cdc628a4
PH
3121 _TESTS = [{
3122 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3123 'playlist_mincount': 5,
3124 'info_dict': {
3125 'title': 'youtube-dl test video',
3126 }
d2c1f79f
S
3127 }, {
3128 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3129 'only_matching': True,
cdc628a4 3130 }]
c9ae7b95
PH
3131
3132 def _real_extract(self, url):
3133 mobj = re.match(self._VALID_URL, url)
7fd002c0 3134 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 3135 webpage = self._download_webpage(url, query)
175c2e9e 3136 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
3137
3138
136dadde 3139class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 3140 IE_DESC = 'YouTube.com (multi-season) shows'
92519402 3141 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 3142 IE_NAME = 'youtube:show'
cdc628a4 3143 _TESTS = [{
4003bd82 3144 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 3145 'playlist_mincount': 5,
cdc628a4
PH
3146 'info_dict': {
3147 'id': 'airdisasters',
3148 'title': 'Air Disasters',
3149 }
3150 }]
75dff0ee
JMF
3151
3152 def _real_extract(self, url):
136dadde
S
3153 playlist_id = self._match_id(url)
3154 return super(YoutubeShowIE, self)._real_extract(
3155 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
3156
3157
b2e8bc1b 3158class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 3159 """
25f14e9f 3160 Base class for feed extractors
d7ae0639
JMF
3161 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3162 """
b2e8bc1b 3163 _LOGIN_REQUIRED = True
d7ae0639
JMF
3164
3165 @property
3166 def IE_NAME(self):
78caa52a 3167 return 'youtube:%s' % self._FEED_NAME
04cc9617 3168
81f0259b 3169 def _real_initialize(self):
b2e8bc1b 3170 self._login()
81f0259b 3171
3853309f 3172 def _entries(self, page):
2bc43303
JMF
3173 # The extraction process is the same as for playlists, but the regex
3174 # for the video ids doesn't contain an index
3175 ids = []
3176 more_widget_html = content_html = page
2bc43303
JMF
3177 for page_num in itertools.count(1):
3178 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
3179
3180 # 'recommended' feed has infinite 'load more' and each new portion spins
3181 # the same videos in (sometimes) slightly different order, so we'll check
3182 # for unicity and break when portion has no new videos
3853309f 3183 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
62c95fd5
S
3184 if not new_ids:
3185 break
3186
2bc43303
JMF
3187 ids.extend(new_ids)
3188
3853309f
S
3189 for entry in self._ids_to_results(new_ids):
3190 yield entry
3191
2bc43303
JMF
3192 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3193 if not mobj:
3194 break
3195
3196 more = self._download_json(
25f14e9f 3197 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303
JMF
3198 'Downloading page #%s' % page_num,
3199 transform_source=uppercase_escape)
3200 content_html = more['content_html']
3201 more_widget_html = more['load_more_widget_html']
3202
3853309f
S
3203 def _real_extract(self, url):
3204 page = self._download_webpage(
3205 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3206 self._PLAYLIST_TITLE)
25f14e9f 3207 return self.playlist_result(
3853309f 3208 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
25f14e9f
S
3209
3210
3211class YoutubeWatchLaterIE(YoutubePlaylistIE):
3212 IE_NAME = 'youtube:watchlater'
3213 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
92519402 3214 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 3215
bc7a9cd8
S
3216 _TESTS = [{
3217 'url': 'https://www.youtube.com/playlist?list=WL',
3218 'only_matching': True,
3219 }, {
3220 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3221 'only_matching': True,
3222 }]
25f14e9f
S
3223
3224 def _real_extract(self, url):
7e5dc339 3225 _, video = self._check_download_just_video(url, 'WL')
ebf1b291
S
3226 if video:
3227 return video
dacb3a86
S
3228 _, playlist = self._extract_playlist('WL')
3229 return playlist
f459d170 3230
5f6a1245 3231
c626a3d9 3232class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 3233 IE_NAME = 'youtube:favorites'
f3a34072 3234 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
92519402 3235 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
3236 _LOGIN_REQUIRED = True
3237
3238 def _real_extract(self, url):
3239 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 3240 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 3241 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
3242
3243
25f14e9f
S
3244class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3245 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
92519402 3246 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
25f14e9f
S
3247 _FEED_NAME = 'recommended'
3248 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 3249
1ed5b5c9 3250
25f14e9f
S
3251class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3252 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
92519402 3253 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
25f14e9f
S
3254 _FEED_NAME = 'subscriptions'
3255 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 3256
1ed5b5c9 3257
25f14e9f
S
3258class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3259 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
92519402 3260 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
25f14e9f
S
3261 _FEED_NAME = 'history'
3262 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
3263
3264
15870e90
PH
3265class YoutubeTruncatedURLIE(InfoExtractor):
3266 IE_NAME = 'youtube:truncated_url'
3267 IE_DESC = False # Do not list
975d35db 3268 _VALID_URL = r'''(?x)
b95aab84
PH
3269 (?:https?://)?
3270 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3271 (?:watch\?(?:
c4808c60 3272 feature=[a-z_]+|
b95aab84
PH
3273 annotation_id=annotation_[^&]+|
3274 x-yt-cl=[0-9]+|
c1708b89 3275 hl=[^&]*|
287be8c6 3276 t=[0-9]+
b95aab84
PH
3277 )?
3278 |
3279 attribution_link\?a=[^&]+
3280 )
3281 $
975d35db 3282 '''
15870e90 3283
c4808c60 3284 _TESTS = [{
2d3d2997 3285 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3286 'only_matching': True,
dc2fc736 3287 }, {
2d3d2997 3288 'url': 'https://www.youtube.com/watch?',
dc2fc736 3289 'only_matching': True,
b95aab84
PH
3290 }, {
3291 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3292 'only_matching': True,
3293 }, {
3294 'url': 'https://www.youtube.com/watch?feature=foo',
3295 'only_matching': True,
c1708b89
PH
3296 }, {
3297 'url': 'https://www.youtube.com/watch?hl=en-GB',
3298 'only_matching': True,
287be8c6
PH
3299 }, {
3300 'url': 'https://www.youtube.com/watch?t=2372',
3301 'only_matching': True,
c4808c60
PH
3302 }]
3303
15870e90
PH
3304 def _real_extract(self, url):
3305 raise ExtractorError(
78caa52a
PH
3306 'Did you forget to quote the URL? Remember that & is a meta '
3307 'character in most shells, so you want to put the URL in quotes, '
3308 'like youtube-dl '
2d3d2997 3309 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
78caa52a 3310 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3311 expected=True)
772fd5cc
PH
3312
3313
3314class YoutubeTruncatedIDIE(InfoExtractor):
3315 IE_NAME = 'youtube:truncated_id'
3316 IE_DESC = False # Do not list
b95aab84 3317 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3318
3319 _TESTS = [{
3320 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3321 'only_matching': True,
3322 }]
3323
3324 def _real_extract(self, url):
3325 video_id = self._match_id(url)
3326 raise ExtractorError(
3327 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3328 expected=True)