]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/youtube.py
[youtube] Use redirected video id if any (closes #25063)
[yt-dlp.git] / youtube_dl / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
f8c55c66 19 compat_HTTPError,
8d81f3e3 20 compat_kwargs,
c5e8d7af 21 compat_parse_qs,
7fd002c0
S
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
c5e8d7af 27 compat_str,
4bb4a188
PH
28)
29from ..utils import (
27019dbb 30 bool_or_none,
c5e8d7af 31 clean_html,
9b9c5355 32 error_to_compat_str,
351f37c0 33 extract_attributes,
c5e8d7af 34 ExtractorError,
2d30521a 35 float_or_none,
4bb4a188
PH
36 get_element_by_attribute,
37 get_element_by_id,
dd27fd17 38 int_or_none,
94278f72 39 mimetype2ext,
4bb4a188 40 orderedSet,
6310acf5 41 parse_codecs,
7c80519c 42 parse_duration,
0cb58b02 43 remove_quotes,
3995d37d 44 remove_start,
cf7e015f 45 smuggle_url,
dbdaaa23 46 str_or_none,
c93d53f5 47 str_to_int,
556dbe7f 48 try_get,
c5e8d7af
PH
49 unescapeHTML,
50 unified_strdate,
cf7e015f 51 unsmuggle_url,
81c2f20b 52 uppercase_escape,
21c340b8 53 url_or_none,
6e6bc8da 54 urlencode_postdata,
c5e8d7af
PH
55)
56
5f6a1245 57
de7f3446 58class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
59 """Provide base functions for Youtube extractors"""
60 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 61 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
62
63 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
64 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
65 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 66
b2e8bc1b
JMF
67 _NETRC_MACHINE = 'youtube'
68 # If True it will raise an error if no login info is provided
69 _LOGIN_REQUIRED = False
70
66b48727 71 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
d0ba5587 72
b2e8bc1b 73 def _set_language(self):
810fb84d
PH
74 self._set_cookie(
75 '.youtube.com', 'PREF', 'f1=50000000&hl=en',
42939b61 76 # YouTube sets the expire time to about two months
810fb84d 77 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 78
25f14e9f
S
79 def _ids_to_results(self, ids):
80 return [
81 self.url_result(vid_id, 'Youtube', video_id=vid_id)
82 for vid_id in ids]
83
b2e8bc1b 84 def _login(self):
83317f69 85 """
86 Attempt to log in to YouTube.
87 True is returned if successful or skipped.
88 False is returned if login failed.
89
90 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
91 """
68217024 92 username, password = self._get_login_info()
b2e8bc1b
JMF
93 # No authentication to be performed
94 if username is None:
70d35d16 95 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 96 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
83317f69 97 return True
b2e8bc1b 98
7cc3570e
PH
99 login_page = self._download_webpage(
100 self._LOGIN_URL, None,
69ea8ca4
PH
101 note='Downloading login page',
102 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
103 if login_page is False:
104 return
b2e8bc1b 105
1212e997 106 login_form = self._hidden_inputs(login_page)
c5e8d7af 107
e00eb564
S
108 def req(url, f_req, note, errnote):
109 data = login_form.copy()
110 data.update({
111 'pstMsg': 1,
112 'checkConnection': 'youtube',
113 'checkedDomains': 'youtube',
114 'hl': 'en',
115 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 116 'f.req': json.dumps(f_req),
e00eb564
S
117 'flowName': 'GlifWebSignIn',
118 'flowEntry': 'ServiceLogin',
baf67a60
S
119 # TODO: reverse actual botguard identifier generation algo
120 'bgRequest': '["identifier",""]',
041bc3ad 121 })
e00eb564
S
122 return self._download_json(
123 url, None, note=note, errnote=errnote,
124 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
125 fatal=False,
126 data=urlencode_postdata(data), headers={
127 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
128 'Google-Accounts-XSRF': 1,
129 })
130
3995d37d
S
131 def warn(message):
132 self._downloader.report_warning(message)
133
134 lookup_req = [
135 username,
136 None, [], None, 'US', None, None, 2, False, True,
137 [
138 None, None,
139 [2, 1, None, 1,
140 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
141 None, [], 4],
142 1, [None, None, []], None, None, None, True
143 ],
144 username,
145 ]
146
e00eb564 147 lookup_results = req(
3995d37d 148 self._LOOKUP_URL, lookup_req,
e00eb564
S
149 'Looking up account info', 'Unable to look up account info')
150
151 if lookup_results is False:
152 return False
041bc3ad 153
3995d37d
S
154 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
155 if not user_hash:
156 warn('Unable to extract user hash')
157 return False
158
159 challenge_req = [
160 user_hash,
161 None, 1, None, [1, None, None, None, [password, None, True]],
162 [
163 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
164 1, [None, None, []], None, None, None, True
165 ]]
83317f69 166
3995d37d
S
167 challenge_results = req(
168 self._CHALLENGE_URL, challenge_req,
169 'Logging in', 'Unable to log in')
83317f69 170
3995d37d 171 if challenge_results is False:
e00eb564 172 return
83317f69 173
3995d37d
S
174 login_res = try_get(challenge_results, lambda x: x[0][5], list)
175 if login_res:
176 login_msg = try_get(login_res, lambda x: x[5], compat_str)
177 warn(
178 'Unable to login: %s' % 'Invalid password'
179 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
180 return False
181
182 res = try_get(challenge_results, lambda x: x[0][-1], list)
183 if not res:
184 warn('Unable to extract result entry')
185 return False
186
9a6628aa
S
187 login_challenge = try_get(res, lambda x: x[0][0], list)
188 if login_challenge:
189 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
190 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
191 # SEND_SUCCESS - TFA code has been successfully sent to phone
192 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 193 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
194 if status == 'QUOTA_EXCEEDED':
195 warn('Exceeded the limit of TFA codes, try later')
196 return False
197
198 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
199 if not tl:
200 warn('Unable to extract TL')
201 return False
202
203 tfa_code = self._get_tfa_info('2-step verification code')
204
205 if not tfa_code:
206 warn(
207 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
208 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
209 return False
210
211 tfa_code = remove_start(tfa_code, 'G-')
212
213 tfa_req = [
214 user_hash, None, 2, None,
215 [
216 9, None, None, None, None, None, None, None,
217 [None, tfa_code, True, 2]
218 ]]
219
220 tfa_results = req(
221 self._TFA_URL.format(tl), tfa_req,
222 'Submitting TFA code', 'Unable to submit TFA code')
223
224 if tfa_results is False:
225 return False
226
227 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
228 if tfa_res:
229 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
230 warn(
231 'Unable to finish TFA: %s' % 'Invalid TFA code'
232 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
233 return False
234
235 check_cookie_url = try_get(
236 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
237 else:
238 CHALLENGES = {
239 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
240 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
241 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
242 }
243 challenge = CHALLENGES.get(
244 challenge_str,
245 '%s returned error %s.' % (self.IE_NAME, challenge_str))
246 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
247 return False
3995d37d
S
248 else:
249 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
250
251 if not check_cookie_url:
252 warn('Unable to extract CheckCookie URL')
253 return False
e00eb564
S
254
255 check_cookie_results = self._download_webpage(
3995d37d
S
256 check_cookie_url, None, 'Checking cookie', fatal=False)
257
258 if check_cookie_results is False:
259 return False
e00eb564 260
3995d37d
S
261 if 'https://myaccount.google.com/' not in check_cookie_results:
262 warn('Unable to log in')
b2e8bc1b 263 return False
e00eb564 264
b2e8bc1b
JMF
265 return True
266
30226342 267 def _download_webpage_handle(self, *args, **kwargs):
c1148516
S
268 query = kwargs.get('query', {}).copy()
269 query['disable_polymer'] = 'true'
270 kwargs['query'] = query
30226342 271 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
272 *args, **compat_kwargs(kwargs))
273
b2e8bc1b
JMF
274 def _real_initialize(self):
275 if self._downloader is None:
276 return
42939b61 277 self._set_language()
b2e8bc1b
JMF
278 if not self._login():
279 return
c5e8d7af 280
8377574c 281
8e7aad20 282class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 283 # Extract entries from page with "Load more" button
648e6a1f
S
284 def _entries(self, page, playlist_id):
285 more_widget_html = content_html = page
286 for page_num in itertools.count(1):
061a75ed
S
287 for entry in self._process_page(content_html):
288 yield entry
648e6a1f
S
289
290 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
291 if not mobj:
292 break
293
f8c55c66
S
294 count = 0
295 retries = 3
296 while count <= retries:
297 try:
298 # Downloading page may result in intermittent 5xx HTTP error
299 # that is usually worked around with a retry
300 more = self._download_json(
301 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
302 'Downloading page #%s%s'
303 % (page_num, ' (retry #%d)' % count if count else ''),
304 transform_source=uppercase_escape)
305 break
306 except ExtractorError as e:
307 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
308 count += 1
309 if count <= retries:
310 continue
311 raise
312
648e6a1f
S
313 content_html = more['content_html']
314 if not content_html.strip():
315 # Some webpages show a "Load more" button but they don't
316 # have more videos
317 break
318 more_widget_html = more['load_more_widget_html']
319
061a75ed
S
320
321class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
322 def _process_page(self, content):
323 for video_id, video_title in self.extract_videos_from_page(content):
324 yield self.url_result(video_id, 'Youtube', video_id, video_title)
325
351f37c0
S
326 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
327 for mobj in re.finditer(video_re, page):
648e6a1f
S
328 # The link with index 0 is not the first video of the playlist (not sure if still actual)
329 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
330 continue
331 video_id = mobj.group('id')
351f37c0
S
332 video_title = unescapeHTML(
333 mobj.group('title')) if 'title' in mobj.groupdict() else None
648e6a1f
S
334 if video_title:
335 video_title = video_title.strip()
351f37c0
S
336 if video_title == '► Play all':
337 video_title = None
648e6a1f
S
338 try:
339 idx = ids_in_page.index(video_id)
340 if video_title and not titles_in_page[idx]:
341 titles_in_page[idx] = video_title
342 except ValueError:
343 ids_in_page.append(video_id)
344 titles_in_page.append(video_title)
351f37c0
S
345
346 def extract_videos_from_page(self, page):
347 ids_in_page = []
348 titles_in_page = []
349 self.extract_videos_from_page_impl(
350 self._VIDEO_RE, page, ids_in_page, titles_in_page)
648e6a1f
S
351 return zip(ids_in_page, titles_in_page)
352
353
061a75ed
S
354class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
355 def _process_page(self, content):
6dee688e
S
356 for playlist_id in orderedSet(re.findall(
357 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
358 content)):
061a75ed
S
359 yield self.url_result(
360 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
361
0c148415
S
362 def _real_extract(self, url):
363 playlist_id = self._match_id(url)
364 webpage = self._download_webpage(url, playlist_id)
0c148415 365 title = self._og_search_title(webpage, fatal=False)
061a75ed 366 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
367
368
360e1ca5 369class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 370 IE_DESC = 'YouTube.com'
cb7dfeea 371 _VALID_URL = r"""(?x)^
c5e8d7af 372 (
edb53e2d 373 (?:https?://|//) # http(s):// or protocol-independent URL
66b48727 374 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
484aaeb2 375 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 376 (?:www\.)?pwnyoutube\.com/|
8b561bfc 377 (?:www\.)?hooktube\.com/|
f7000f3a 378 (?:www\.)?yourepeat\.com/|
e69ae5b9 379 tube\.majestyc\.net/|
ba036333 380 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
77d95677 381 (?:(?:www|dev)\.)?invidio\.us/|
ba036333 382 (?:(?:www|no)\.)?invidiou\.sh/|
383 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
8ae113ca 384 (?:www\.)?invidious\.kabi\.tk/|
ba036333 385 (?:www\.)?invidious\.13ad\.de/|
791d2e81 386 (?:www\.)?invidious\.mastodon\.host/|
494d664e 387 (?:www\.)?invidious\.nixnet\.xyz/|
666d808e 388 (?:www\.)?invidious\.drycat\.fr/|
ba036333 389 (?:www\.)?tube\.poal\.co/|
8ae113ca 390 (?:www\.)?vid\.wxzm\.sx/|
494d664e 391 (?:www\.)?yt\.elukerio\.org/|
894b3826 392 (?:www\.)?yt\.lelux\.fi/|
bff90fc5 393 (?:www\.)?kgg2m7yk5aybusll\.onion/|
394 (?:www\.)?qklhadlycap4cnod\.onion/|
395 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
396 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
397 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
398 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
33c1c7d8 399 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
e69ae5b9 400 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
401 (?:.*?\#/)? # handle anchor (#/) redirect urls
402 (?: # the various things that can precede the ID:
ac7553d0 403 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 404 |(?: # or the v= param in all its forms
f7000f3a 405 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 406 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 407 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
408 v=
409 )
f4b05232 410 ))
cbaed4bb
S
411 |(?:
412 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
413 vid\.plus| # or vid.plus/xxxx
414 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 415 )/
edb53e2d 416 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 417 )
c5e8d7af 418 )? # all until now is optional -> you can pass the naked ID
8963d9c2 419 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
420 (?!.*?\blist=
421 (?:
422 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
423 WL # WL are handled by the watch later IE
424 )
425 )
c5e8d7af 426 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 427 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 428 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
2c62dc26 429 _formats = {
c2d3cb4c 430 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
431 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
432 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
433 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
434 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
435 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
436 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
437 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 438 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 439 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
440 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
441 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
442 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
443 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
444 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 445 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 446 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
447 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 448
449
450 # 3D videos
c2d3cb4c 451 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
452 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
453 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
454 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 455 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
456 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
457 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 458
96fb5605 459 # Apple HTTP Live Streaming
11f12195 460 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 461 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
462 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
463 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
464 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
465 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 466 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
467 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
468
469 # DASH mp4 video
d23028a8
S
470 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
471 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
472 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
473 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
474 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 475 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
476 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
477 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
478 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
479 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
480 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
481 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 482
f6f1fc92 483 # Dash mp4 audio
d23028a8
S
484 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
485 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
486 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
487 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
488 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
489 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
490 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
491
492 # Dash webm
d23028a8
S
493 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
494 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
495 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
496 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
497 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
498 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
499 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
500 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
501 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
502 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
503 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
504 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
505 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
506 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
507 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 508 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
509 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
510 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
511 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
512 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
513 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
514 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
515
516 # Dash webm audio
d23028a8
S
517 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
518 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 519
0857baad 520 # Dash webm audio with opus inside
d23028a8
S
521 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
522 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
523 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 524
ce6b9a2d
PH
525 # RTMP (unnamed)
526 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
527
528 # av01 video only formats sometimes served with "unknown" codecs
529 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
530 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
531 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
532 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 533 }
19041a38 534 _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 535
fd5c4aab
S
536 _GEO_BYPASS = False
537
78caa52a 538 IE_NAME = 'youtube'
2eb88d95
PH
539 _TESTS = [
540 {
2d3d2997 541 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
542 'info_dict': {
543 'id': 'BaW_jenozKc',
544 'ext': 'mp4',
545 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
546 'uploader': 'Philipp Hagemeister',
547 'uploader_id': 'phihag',
ec85ded8 548 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
549 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
550 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e
PH
551 'upload_date': '20121002',
552 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
553 'categories': ['Science & Technology'],
000b6b5a 554 'tags': ['youtube-dl'],
556dbe7f 555 'duration': 10,
dbdaaa23 556 'view_count': int,
3e7c1224
PH
557 'like_count': int,
558 'dislike_count': int,
7c80519c 559 'start_time': 1,
297a564b 560 'end_time': 9,
2eb88d95 561 }
0e853ca4 562 },
0e853ca4 563 {
2d3d2997 564 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
4bc3a23e
PH
565 'note': 'Test generic use_cipher_signature video (#897)',
566 'info_dict': {
567 'id': 'UxxajLWwzqY',
568 'ext': 'mp4',
569 'upload_date': '20120506',
570 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
0cb58b02 571 'alt_title': 'I Love It (feat. Charli XCX)',
5429d6a9 572 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
000b6b5a
S
573 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
574 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
575 'iconic ep', 'iconic', 'love', 'it'],
556dbe7f 576 'duration': 180,
4bc3a23e
PH
577 'uploader': 'Icona Pop',
578 'uploader_id': 'IconaPop',
ec85ded8 579 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
0cb58b02 580 'creator': 'Icona Pop',
936784b2
S
581 'track': 'I Love It (feat. Charli XCX)',
582 'artist': 'Icona Pop',
2eb88d95 583 }
c108eb73
JMF
584 },
585 {
4bc3a23e
PH
586 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
587 'note': 'Test VEVO video with age protection (#956)',
588 'info_dict': {
589 'id': '07FYdnEawAQ',
590 'ext': 'mp4',
591 'upload_date': '20130703',
4fe54c12 592 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
0cb58b02 593 'alt_title': 'Tunnel Vision',
4fe54c12 594 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
556dbe7f 595 'duration': 419,
4bc3a23e
PH
596 'uploader': 'justintimberlakeVEVO',
597 'uploader_id': 'justintimberlakeVEVO',
ec85ded8 598 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
0cb58b02 599 'creator': 'Justin Timberlake',
7e72694b 600 'track': 'Tunnel Vision',
936784b2 601 'artist': 'Justin Timberlake',
34952f09 602 'age_limit': 18,
c108eb73
JMF
603 }
604 },
fccd3771 605 {
4bc3a23e
PH
606 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
607 'note': 'Embed-only video (#1746)',
608 'info_dict': {
609 'id': 'yZIXLfi8CZQ',
610 'ext': 'mp4',
611 'upload_date': '20120608',
612 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
613 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
614 'uploader': 'SET India',
94bfcd23 615 'uploader_id': 'setindia',
ec85ded8 616 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 617 'age_limit': 18,
fccd3771
PH
618 }
619 },
11b56058 620 {
2d3d2997 621 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
622 'note': 'Use the first video ID in the URL',
623 'info_dict': {
624 'id': 'BaW_jenozKc',
625 'ext': 'mp4',
626 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
627 'uploader': 'Philipp Hagemeister',
628 'uploader_id': 'phihag',
ec85ded8 629 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058
PM
630 'upload_date': '20121002',
631 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
632 'categories': ['Science & Technology'],
633 'tags': ['youtube-dl'],
556dbe7f 634 'duration': 10,
dbdaaa23 635 'view_count': int,
11b56058
PM
636 'like_count': int,
637 'dislike_count': int,
34a7de29
S
638 },
639 'params': {
640 'skip_download': True,
641 },
11b56058 642 },
dd27fd17 643 {
2d3d2997 644 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
645 'note': '256k DASH audio (format 141) via DASH manifest',
646 'info_dict': {
647 'id': 'a9LDPn-MO4I',
648 'ext': 'm4a',
649 'upload_date': '20121002',
650 'uploader_id': '8KVIDEO',
ec85ded8 651 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
652 'description': '',
653 'uploader': '8KVIDEO',
654 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 655 },
4bc3a23e
PH
656 'params': {
657 'youtube_include_dash_manifest': True,
658 'format': '141',
4919603f 659 },
de3c7fe0 660 'skip': 'format 141 not served anymore',
dd27fd17 661 },
3489b7d2
JMF
662 # DASH manifest with encrypted signature
663 {
78caa52a
PH
664 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
665 'info_dict': {
666 'id': 'IB3lcPjvWLA',
667 'ext': 'm4a',
4fe54c12
S
668 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
669 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
556dbe7f 670 'duration': 244,
78caa52a
PH
671 'uploader': 'AfrojackVEVO',
672 'uploader_id': 'AfrojackVEVO',
673 'upload_date': '20131011',
3489b7d2 674 },
4bc3a23e 675 'params': {
78caa52a 676 'youtube_include_dash_manifest': True,
de3c7fe0 677 'format': '141/bestaudio[ext=m4a]',
3489b7d2
JMF
678 },
679 },
aaeb86f6
S
680 # JS player signature function name containing $
681 {
682 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
683 'info_dict': {
684 'id': 'nfWlot6h_JM',
685 'ext': 'm4a',
686 'title': 'Taylor Swift - Shake It Off',
5429d6a9 687 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
556dbe7f 688 'duration': 242,
aaeb86f6
S
689 'uploader': 'TaylorSwiftVEVO',
690 'uploader_id': 'TaylorSwiftVEVO',
691 'upload_date': '20140818',
692 },
693 'params': {
694 'youtube_include_dash_manifest': True,
de3c7fe0 695 'format': '141/bestaudio[ext=m4a]',
aaeb86f6
S
696 },
697 },
aa79ac0c
PH
698 # Controversy video
699 {
700 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
701 'info_dict': {
702 'id': 'T4XJQO3qol8',
703 'ext': 'mp4',
556dbe7f 704 'duration': 219,
aa79ac0c 705 'upload_date': '20100909',
4fe54c12 706 'uploader': 'Amazing Atheist',
aa79ac0c 707 'uploader_id': 'TheAmazingAtheist',
ec85ded8 708 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c
PH
709 'title': 'Burning Everyone\'s Koran',
710 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
711 }
c522adb1
JMF
712 },
713 # Normal age-gate video (No vevo, embed allowed)
714 {
2d3d2997 715 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
716 'info_dict': {
717 'id': 'HtVdAasjOgU',
718 'ext': 'mp4',
719 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 720 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 721 'duration': 142,
c522adb1
JMF
722 'uploader': 'The Witcher',
723 'uploader_id': 'WitcherGame',
ec85ded8 724 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 725 'upload_date': '20140605',
34952f09 726 'age_limit': 18,
c522adb1
JMF
727 },
728 },
fccae2b9
S
729 # Age-gate video with encrypted signature
730 {
2d3d2997 731 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
fccae2b9
S
732 'info_dict': {
733 'id': '6kLq3WMV1nU',
4fe54c12 734 'ext': 'mp4',
fccae2b9
S
735 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
736 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
eb6793ba 737 'duration': 246,
fccae2b9
S
738 'uploader': 'LloydVEVO',
739 'uploader_id': 'LloydVEVO',
ec85ded8 740 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
fccae2b9 741 'upload_date': '20110629',
34952f09 742 'age_limit': 18,
fccae2b9
S
743 },
744 },
067aa17e 745 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
7d02dcfa 746 # YouTube Red ad is not captured for creator
774e208f
PH
747 {
748 'url': '__2ABJjxzNo',
749 'info_dict': {
750 'id': '__2ABJjxzNo',
751 'ext': 'mp4',
556dbe7f 752 'duration': 266,
774e208f
PH
753 'upload_date': '20100430',
754 'uploader_id': 'deadmau5',
ec85ded8 755 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
5429d6a9 756 'creator': 'Dada Life, deadmau5',
774e208f
PH
757 'description': 'md5:12c56784b8032162bb936a5f76d55360',
758 'uploader': 'deadmau5',
759 'title': 'Deadmau5 - Some Chords (HD)',
5429d6a9 760 'alt_title': 'This Machine Kills Some Chords',
774e208f
PH
761 },
762 'expected_warnings': [
763 'DASH manifest missing',
764 ]
e52a40ab 765 },
067aa17e 766 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
767 {
768 'url': 'lqQg6PlCWgI',
769 'info_dict': {
770 'id': 'lqQg6PlCWgI',
771 'ext': 'mp4',
556dbe7f 772 'duration': 6085,
90227264 773 'upload_date': '20150827',
cbe2bd91 774 'uploader_id': 'olympic',
ec85ded8 775 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 776 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 777 'uploader': 'Olympic',
cbe2bd91
PH
778 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
779 },
780 'params': {
781 'skip_download': 'requires avconv',
e52a40ab 782 }
cbe2bd91 783 },
6271f1ca
PH
784 # Non-square pixels
785 {
786 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
787 'info_dict': {
788 'id': '_b-2C3KPAM0',
789 'ext': 'mp4',
790 'stretched_ratio': 16 / 9.,
556dbe7f 791 'duration': 85,
6271f1ca
PH
792 'upload_date': '20110310',
793 'uploader_id': 'AllenMeow',
ec85ded8 794 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 795 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 796 'uploader': '孫ᄋᄅ',
6271f1ca
PH
797 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
798 },
06b491eb
S
799 },
800 # url_encoded_fmt_stream_map is empty string
801 {
802 'url': 'qEJwOuvDf7I',
803 'info_dict': {
804 'id': 'qEJwOuvDf7I',
f57b7835 805 'ext': 'webm',
06b491eb
S
806 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
807 'description': '',
808 'upload_date': '20150404',
809 'uploader_id': 'spbelect',
810 'uploader': 'Наблюдатели Петербурга',
811 },
812 'params': {
813 'skip_download': 'requires avconv',
e323cf3f
S
814 },
815 'skip': 'This live event has ended.',
06b491eb 816 },
067aa17e 817 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
818 {
819 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
820 'info_dict': {
821 'id': 'FIl7x6_3R5Y',
eb6793ba 822 'ext': 'webm',
da77d856
S
823 'title': 'md5:7b81415841e02ecd4313668cde88737a',
824 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 825 'duration': 220,
da77d856
S
826 'upload_date': '20150625',
827 'uploader_id': 'dorappi2000',
ec85ded8 828 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 829 'uploader': 'dorappi2000',
eb6793ba 830 'formats': 'mincount:31',
da77d856 831 },
eb6793ba 832 'skip': 'not actual anymore',
2ee8f5d8 833 },
8a1a26ce
YCH
834 # DASH manifest with segment_list
835 {
836 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
837 'md5': '8ce563a1d667b599d21064e982ab9e31',
838 'info_dict': {
839 'id': 'CsmdDsKjzN8',
840 'ext': 'mp4',
17ee98e1 841 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
842 'uploader': 'Airtek',
843 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
844 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
845 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
846 },
847 'params': {
848 'youtube_include_dash_manifest': True,
849 'format': '135', # bestvideo
be49068d
S
850 },
851 'skip': 'This live event has ended.',
2ee8f5d8 852 },
cf7e015f
S
853 {
854 # Multifeed videos (multiple cameras), URL is for Main Camera
855 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
856 'info_dict': {
857 'id': 'jqWvoWXjCVs',
858 'title': 'teamPGP: Rocket League Noob Stream',
859 'description': 'md5:dc7872fb300e143831327f1bae3af010',
860 },
861 'playlist': [{
862 'info_dict': {
863 'id': 'jqWvoWXjCVs',
864 'ext': 'mp4',
865 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
866 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 867 'duration': 7335,
cf7e015f
S
868 'upload_date': '20150721',
869 'uploader': 'Beer Games Beer',
870 'uploader_id': 'beergamesbeer',
ec85ded8 871 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 872 'license': 'Standard YouTube License',
cf7e015f
S
873 },
874 }, {
875 'info_dict': {
876 'id': '6h8e8xoXJzg',
877 'ext': 'mp4',
878 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
879 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 880 'duration': 7337,
cf7e015f
S
881 'upload_date': '20150721',
882 'uploader': 'Beer Games Beer',
883 'uploader_id': 'beergamesbeer',
ec85ded8 884 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 885 'license': 'Standard YouTube License',
cf7e015f
S
886 },
887 }, {
888 'info_dict': {
889 'id': 'PUOgX5z9xZw',
890 'ext': 'mp4',
891 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
892 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 893 'duration': 7337,
cf7e015f
S
894 'upload_date': '20150721',
895 'uploader': 'Beer Games Beer',
896 'uploader_id': 'beergamesbeer',
ec85ded8 897 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 898 'license': 'Standard YouTube License',
cf7e015f
S
899 },
900 }, {
901 'info_dict': {
902 'id': 'teuwxikvS5k',
903 'ext': 'mp4',
904 'title': 'teamPGP: Rocket League Noob Stream (zim)',
905 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 906 'duration': 7334,
cf7e015f
S
907 'upload_date': '20150721',
908 'uploader': 'Beer Games Beer',
909 'uploader_id': 'beergamesbeer',
ec85ded8 910 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 911 'license': 'Standard YouTube License',
cf7e015f
S
912 },
913 }],
914 'params': {
915 'skip_download': True,
916 },
4fe54c12 917 'skip': 'This video is not available.',
cbaed4bb 918 },
f9f49d87 919 {
067aa17e 920 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
921 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
922 'info_dict': {
923 'id': 'gVfLd0zydlo',
924 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
925 },
926 'playlist_count': 2,
be49068d 927 'skip': 'Not multifeed anymore',
f9f49d87 928 },
cbaed4bb 929 {
2d3d2997 930 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 931 'only_matching': True,
0e49d9a6 932 },
6d4fc66b 933 {
2d3d2997 934 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
935 'only_matching': True,
936 },
0e49d9a6 937 {
067aa17e 938 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 939 # Also tests cut-off URL expansion in video description (see
067aa17e
S
940 # https://github.com/ytdl-org/youtube-dl/issues/1892,
941 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
942 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
943 'info_dict': {
944 'id': 'lsguqyKfVQg',
945 'ext': 'mp4',
946 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 947 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 948 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 949 'duration': 133,
0e49d9a6
LL
950 'upload_date': '20151119',
951 'uploader_id': 'IronSoulElf',
ec85ded8 952 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 953 'uploader': 'IronSoulElf',
eb6793ba
S
954 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
955 'track': 'Dark Walk - Position Music',
956 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 957 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
958 },
959 'params': {
960 'skip_download': True,
961 },
962 },
61f92af1 963 {
067aa17e 964 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
965 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
966 'only_matching': True,
967 },
313dfc45
LL
968 {
969 # Video with yt:stretch=17:0
970 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
971 'info_dict': {
972 'id': 'Q39EVAstoRM',
973 'ext': 'mp4',
974 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
975 'description': 'md5:ee18a25c350637c8faff806845bddee9',
976 'upload_date': '20151107',
977 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
978 'uploader': 'CH GAMER DROID',
979 },
980 'params': {
981 'skip_download': True,
982 },
be49068d 983 'skip': 'This video does not exist.',
313dfc45 984 },
7caf9830
S
985 {
986 # Video licensed under Creative Commons
987 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
988 'info_dict': {
989 'id': 'M4gD1WSo5mA',
990 'ext': 'mp4',
991 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
992 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 993 'duration': 721,
7caf9830
S
994 'upload_date': '20150127',
995 'uploader_id': 'BerkmanCenter',
ec85ded8 996 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 997 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
998 'license': 'Creative Commons Attribution license (reuse allowed)',
999 },
1000 'params': {
1001 'skip_download': True,
1002 },
1003 },
fd050249
S
1004 {
1005 # Channel-like uploader_url
1006 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1007 'info_dict': {
1008 'id': 'eQcmzGIKrzg',
1009 'ext': 'mp4',
1010 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1011 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 1012 'duration': 4060,
fd050249 1013 'upload_date': '20151119',
eb6793ba 1014 'uploader': 'Bernie Sanders',
fd050249 1015 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1016 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1017 'license': 'Creative Commons Attribution license (reuse allowed)',
1018 },
1019 'params': {
1020 'skip_download': True,
1021 },
1022 },
040ac686
S
1023 {
1024 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1025 'only_matching': True,
7f29cf54
S
1026 },
1027 {
067aa17e 1028 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1029 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1030 'only_matching': True,
6496ccb4
S
1031 },
1032 {
1033 # Rental video preview
1034 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1035 'info_dict': {
1036 'id': 'uGpuVWrhIzE',
1037 'ext': 'mp4',
1038 'title': 'Piku - Trailer',
1039 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1040 'upload_date': '20150811',
1041 'uploader': 'FlixMatrix',
1042 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1043 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1044 'license': 'Standard YouTube License',
1045 },
1046 'params': {
1047 'skip_download': True,
1048 },
eb6793ba 1049 'skip': 'This video is not available.',
022a5d66 1050 },
12afdc2a
S
1051 {
1052 # YouTube Red video with episode data
1053 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1054 'info_dict': {
1055 'id': 'iqKdEhx-dD4',
1056 'ext': 'mp4',
1057 'title': 'Isolation - Mind Field (Ep 1)',
4fe54c12 1058 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
556dbe7f 1059 'duration': 2085,
12afdc2a
S
1060 'upload_date': '20170118',
1061 'uploader': 'Vsauce',
1062 'uploader_id': 'Vsauce',
1063 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1064 'series': 'Mind Field',
1065 'season_number': 1,
1066 'episode_number': 1,
1067 },
1068 'params': {
1069 'skip_download': True,
1070 },
1071 'expected_warnings': [
1072 'Skipping DASH manifest',
1073 ],
1074 },
c7121fa7
S
1075 {
1076 # The following content has been identified by the YouTube community
1077 # as inappropriate or offensive to some audiences.
1078 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1079 'info_dict': {
1080 'id': '6SJNVb0GnPI',
1081 'ext': 'mp4',
1082 'title': 'Race Differences in Intelligence',
1083 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1084 'duration': 965,
1085 'upload_date': '20140124',
1086 'uploader': 'New Century Foundation',
1087 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1088 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1089 },
1090 'params': {
1091 'skip_download': True,
1092 },
1093 },
022a5d66
S
1094 {
1095 # itag 212
1096 'url': '1t24XAntNCY',
1097 'only_matching': True,
fd5c4aab
S
1098 },
1099 {
1100 # geo restricted to JP
1101 'url': 'sJL6WA-aGkQ',
1102 'only_matching': True,
1103 },
d0ba5587
S
1104 {
1105 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1106 'only_matching': True,
1107 },
cd5a74a2
S
1108 {
1109 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1110 'only_matching': True,
1111 },
825cd268
RA
1112 {
1113 # DRM protected
1114 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1115 'only_matching': True,
4fe54c12
S
1116 },
1117 {
1118 # Video with unsupported adaptive stream type formats
1119 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1120 'info_dict': {
1121 'id': 'Z4Vy8R84T1U',
1122 'ext': 'mp4',
1123 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1124 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1125 'duration': 433,
1126 'upload_date': '20130923',
1127 'uploader': 'Amelia Putri Harwita',
1128 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1129 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1130 'formats': 'maxcount:10',
1131 },
1132 'params': {
1133 'skip_download': True,
1134 'youtube_include_dash_manifest': False,
1135 },
5429d6a9 1136 'skip': 'not actual anymore',
5caabd3c 1137 },
1138 {
822b9d9c 1139 # Youtube Music Auto-generated description
5caabd3c 1140 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1141 'info_dict': {
1142 'id': 'MgNrAu2pzNs',
1143 'ext': 'mp4',
1144 'title': 'Voyeur Girl',
1145 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1146 'upload_date': '20190312',
5429d6a9
S
1147 'uploader': 'Stephen - Topic',
1148 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1149 'artist': 'Stephen',
1150 'track': 'Voyeur Girl',
1151 'album': 'it\'s too much love to know my dear',
1152 'release_date': '20190313',
1153 'release_year': 2019,
1154 },
1155 'params': {
1156 'skip_download': True,
1157 },
1158 },
1159 {
822b9d9c 1160 # Youtube Music Auto-generated description
5caabd3c 1161 # Retrieve 'artist' field from 'Artist:' in video description
1162 # when it is present on youtube music video
5caabd3c 1163 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1164 'info_dict': {
1165 'id': 'k0jLE7tTwjY',
1166 'ext': 'mp4',
1167 'title': 'Latch Feat. Sam Smith',
1168 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1169 'upload_date': '20150110',
1170 'uploader': 'Various Artists - Topic',
1171 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1172 'artist': 'Disclosure',
1173 'track': 'Latch Feat. Sam Smith',
1174 'album': 'Latch Featuring Sam Smith',
1175 'release_date': '20121008',
1176 'release_year': 2012,
1177 },
1178 'params': {
1179 'skip_download': True,
1180 },
1181 },
1182 {
822b9d9c 1183 # Youtube Music Auto-generated description
5caabd3c 1184 # handle multiple artists on youtube music video
1185 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1186 'info_dict': {
1187 'id': '74qn0eJSjpA',
1188 'ext': 'mp4',
1189 'title': 'Eastside',
1190 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1191 'upload_date': '20180710',
1192 'uploader': 'Benny Blanco - Topic',
1193 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1194 'artist': 'benny blanco, Halsey, Khalid',
1195 'track': 'Eastside',
1196 'album': 'Eastside',
1197 'release_date': '20180713',
1198 'release_year': 2018,
1199 },
1200 'params': {
1201 'skip_download': True,
1202 },
1203 },
1204 {
822b9d9c 1205 # Youtube Music Auto-generated description
5caabd3c 1206 # handle youtube music video with release_year and no release_date
1207 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1208 'info_dict': {
1209 'id': '-hcAI0g-f5M',
1210 'ext': 'mp4',
1211 'title': 'Put It On Me',
5429d6a9 1212 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
5caabd3c 1213 'upload_date': '20180426',
1214 'uploader': 'Matt Maeson - Topic',
1215 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1216 'artist': 'Matt Maeson',
1217 'track': 'Put It On Me',
1218 'album': 'The Hearse',
1219 'release_date': None,
1220 'release_year': 2018,
1221 },
1222 'params': {
1223 'skip_download': True,
1224 },
1225 },
66b48727
RA
1226 {
1227 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1228 'only_matching': True,
1229 },
011e75e6
S
1230 {
1231 # invalid -> valid video id redirection
1232 'url': 'DJztXj2GPfl',
1233 'info_dict': {
1234 'id': 'DJztXj2GPfk',
1235 'ext': 'mp4',
1236 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1237 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1238 'upload_date': '20090125',
1239 'uploader': 'Prochorowka',
1240 'uploader_id': 'Prochorowka',
1241 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1242 'artist': 'Panjabi MC',
1243 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1244 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1245 },
1246 'params': {
1247 'skip_download': True,
1248 },
1249 }
2eb88d95
PH
1250 ]
1251
e0df6211
PH
1252 def __init__(self, *args, **kwargs):
1253 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1254 self._player_cache = {}
e0df6211 1255
c5e8d7af
PH
1256 def report_video_info_webpage_download(self, video_id):
1257 """Report attempt to download video info webpage."""
69ea8ca4 1258 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1259
c5e8d7af
PH
1260 def report_information_extraction(self, video_id):
1261 """Report attempt to extract video information."""
69ea8ca4 1262 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1263
1264 def report_unavailable_format(self, video_id, format):
1265 """Report extracted video URL."""
69ea8ca4 1266 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1267
1268 def report_rtmp_download(self):
1269 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1270 self.to_screen('RTMP download detected')
c5e8d7af 1271
60064c53
PH
1272 def _signature_cache_id(self, example_sig):
1273 """ Return a string representation of a signature """
78caa52a 1274 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53
PH
1275
1276 def _extract_signature_function(self, video_id, player_url, example_sig):
cf010131 1277 id_m = re.match(
dc879c5a 1278 r'.*?[-.](?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
cf010131 1279 player_url)
c081b35c
PH
1280 if not id_m:
1281 raise ExtractorError('Cannot identify player %r' % player_url)
e0df6211
PH
1282 player_type = id_m.group('ext')
1283 player_id = id_m.group('id')
1284
c4417ddb 1285 # Read from filesystem cache
60064c53
PH
1286 func_id = '%s_%s_%s' % (
1287 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1288 assert os.path.basename(func_id) == func_id
a0e07d31 1289
69ea8ca4 1290 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1291 if cache_spec is not None:
78caa52a 1292 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1293
6d1a55a5
PH
1294 download_note = (
1295 'Downloading player %s' % player_url
1296 if self._downloader.params.get('verbose') else
1297 'Downloading %s player %s' % (player_type, player_id)
1298 )
e0df6211
PH
1299 if player_type == 'js':
1300 code = self._download_webpage(
1301 player_url, video_id,
6d1a55a5 1302 note=download_note,
69ea8ca4 1303 errnote='Download of %s failed' % player_url)
83799698 1304 res = self._parse_sig_js(code)
c4417ddb 1305 elif player_type == 'swf':
e0df6211
PH
1306 urlh = self._request_webpage(
1307 player_url, video_id,
6d1a55a5 1308 note=download_note,
69ea8ca4 1309 errnote='Download of %s failed' % player_url)
e0df6211 1310 code = urlh.read()
83799698 1311 res = self._parse_sig_swf(code)
e0df6211
PH
1312 else:
1313 assert False, 'Invalid player type %r' % player_type
1314
785521bf
PH
1315 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1316 cache_res = res(test_string)
1317 cache_spec = [ord(c) for c in cache_res]
83799698 1318
69ea8ca4 1319 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1320 return res
1321
60064c53 1322 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1323 def gen_sig_code(idxs):
1324 def _genslice(start, end, step):
78caa52a 1325 starts = '' if start == 0 else str(start)
8bcc8756 1326 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1327 steps = '' if step == 1 else (':%d' % step)
78caa52a 1328 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1329
1330 step = None
7af808a5
PH
1331 # Quelch pyflakes warnings - start will be set when step is set
1332 start = '(Never used)'
edf3e38e
PH
1333 for i, prev in zip(idxs[1:], idxs[:-1]):
1334 if step is not None:
1335 if i - prev == step:
1336 continue
1337 yield _genslice(start, prev, step)
1338 step = None
1339 continue
1340 if i - prev in [-1, 1]:
1341 step = i - prev
1342 start = prev
1343 continue
1344 else:
78caa52a 1345 yield 's[%d]' % prev
edf3e38e 1346 if step is None:
78caa52a 1347 yield 's[%d]' % i
edf3e38e
PH
1348 else:
1349 yield _genslice(start, i, step)
1350
78caa52a 1351 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1352 cache_res = func(test_string)
edf3e38e 1353 cache_spec = [ord(c) for c in cache_res]
78caa52a 1354 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1355 signature_id_tuple = '(%s)' % (
1356 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1357 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1358 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1359 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1360
e0df6211
PH
1361 def _parse_sig_js(self, jscode):
1362 funcname = self._search_regex(
abefc03f
S
1363 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1364 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
c3cfea90 1365 r'\b(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1366 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1367 # Obsolete patterns
1368 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1369 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1370 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1371 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1372 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1373 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1374 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1375 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1376 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1377
1378 jsi = JSInterpreter(jscode)
1379 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1380 return lambda s: initial_function([s])
1381
1382 def _parse_sig_swf(self, file_contents):
54256267 1383 swfi = SWFInterpreter(file_contents)
78caa52a 1384 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1385 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1386 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1387 return lambda s: initial_function([s])
1388
83799698 1389 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1390 """Turn the encrypted s field into a working signature"""
6b37f0be 1391
c8bf86d5 1392 if player_url is None:
69ea8ca4 1393 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1394
69ea8ca4 1395 if player_url.startswith('//'):
78caa52a 1396 player_url = 'https:' + player_url
3c90cc8b
S
1397 elif not re.match(r'https?://', player_url):
1398 player_url = compat_urlparse.urljoin(
1399 'https://www.youtube.com', player_url)
c8bf86d5 1400 try:
62af3a0e 1401 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1402 if player_id not in self._player_cache:
1403 func = self._extract_signature_function(
60064c53 1404 video_id, player_url, s
c8bf86d5
PH
1405 )
1406 self._player_cache[player_id] = func
1407 func = self._player_cache[player_id]
1408 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1409 self._print_sig_code(func, s)
c8bf86d5
PH
1410 return func(s)
1411 except Exception as e:
1412 tb = traceback.format_exc()
1413 raise ExtractorError(
78caa52a 1414 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1415
360e1ca5 1416 def _get_subtitles(self, video_id, webpage):
de7f3446 1417 try:
60e47a26 1418 subs_doc = self._download_xml(
38c2e5b8 1419 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1420 video_id, note=False)
1421 except ExtractorError as err:
9b9c5355 1422 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1423 return {}
de7f3446
JMF
1424
1425 sub_lang_list = {}
60e47a26
JMF
1426 for track in subs_doc.findall('track'):
1427 lang = track.attrib['lang_code']
7e660ac1
LD
1428 if lang in sub_lang_list:
1429 continue
360e1ca5 1430 sub_formats = []
23d17e4b 1431 for ext in self._SUBTITLE_FORMATS:
15707c7e 1432 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1433 'lang': lang,
1434 'v': video_id,
1435 'fmt': ext,
1436 'name': track.attrib['name'].encode('utf-8'),
1437 })
1438 sub_formats.append({
1439 'url': 'https://www.youtube.com/api/timedtext?' + params,
1440 'ext': ext,
1441 })
1442 sub_lang_list[lang] = sub_formats
de7f3446 1443 if not sub_lang_list:
69ea8ca4 1444 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1445 return {}
1446 return sub_lang_list
1447
a72778d3
S
1448 def _get_ytplayer_config(self, video_id, webpage):
1449 patterns = (
526b3b07
S
1450 # User data may contain arbitrary character sequences that may affect
1451 # JSON extraction with regex, e.g. when '};' is contained the second
1452 # regex won't capture the whole JSON. Yet working around by trying more
1453 # concrete regex first keeping in mind proper quoted string handling
1454 # to be implemented in future that will replace this workaround (see
067aa17e
S
1455 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1456 # https://github.com/ytdl-org/youtube-dl/pull/7599)
a72778d3
S
1457 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1458 r';ytplayer\.config\s*=\s*({.+?});',
1459 )
1460 config = self._search_regex(
1461 patterns, webpage, 'ytplayer.config', default=None)
1462 if config:
1463 return self._parse_json(
1464 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1465
360e1ca5 1466 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1467 """We need the webpage for getting the captions url, pass it as an
1468 argument to speed up the process."""
69ea8ca4 1469 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1470 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1471 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1472 if not player_config:
de7f3446
JMF
1473 self._downloader.report_warning(err_msg)
1474 return {}
de7f3446 1475 try:
0792d563 1476 args = player_config['args']
b78b292f
S
1477 caption_url = args.get('ttsurl')
1478 if caption_url:
1479 timestamp = args['timestamp']
1480 # We get the available subtitles
15707c7e 1481 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1482 'type': 'list',
1483 'tlangs': 1,
1484 'asrs': 1,
1485 })
1486 list_url = caption_url + '&' + list_params
1487 caption_list = self._download_xml(list_url, video_id)
1488 original_lang_node = caption_list.find('track')
1489 if original_lang_node is None:
1490 self._downloader.report_warning('Video doesn\'t have automatic captions')
1491 return {}
1492 original_lang = original_lang_node.attrib['lang_code']
1493 caption_kind = original_lang_node.attrib.get('kind', '')
1494
1495 sub_lang_list = {}
1496 for lang_node in caption_list.findall('target'):
1497 sub_lang = lang_node.attrib['lang_code']
1498 sub_formats = []
1499 for ext in self._SUBTITLE_FORMATS:
15707c7e 1500 params = compat_urllib_parse_urlencode({
b78b292f
S
1501 'lang': original_lang,
1502 'tlang': sub_lang,
1503 'fmt': ext,
1504 'ts': timestamp,
1505 'kind': caption_kind,
1506 })
1507 sub_formats.append({
1508 'url': caption_url + '&' + params,
1509 'ext': ext,
1510 })
1511 sub_lang_list[sub_lang] = sub_formats
1512 return sub_lang_list
1513
ddbb4c5c
S
1514 def make_captions(sub_url, sub_langs):
1515 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1516 caption_qs = compat_parse_qs(parsed_sub_url.query)
1517 captions = {}
1518 for sub_lang in sub_langs:
1519 sub_formats = []
1520 for ext in self._SUBTITLE_FORMATS:
1521 caption_qs.update({
1522 'tlang': [sub_lang],
1523 'fmt': [ext],
1524 })
1525 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1526 query=compat_urllib_parse_urlencode(caption_qs, True)))
1527 sub_formats.append({
1528 'url': sub_url,
1529 'ext': ext,
1530 })
1531 captions[sub_lang] = sub_formats
1532 return captions
1533
1534 # New captions format as of 22.06.2017
1535 player_response = args.get('player_response')
1536 if player_response and isinstance(player_response, compat_str):
1537 player_response = self._parse_json(
1538 player_response, video_id, fatal=False)
1539 if player_response:
1540 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1541 base_url = renderer['captionTracks'][0]['baseUrl']
1542 sub_lang_list = []
1543 for lang in renderer['translationLanguages']:
1544 lang_code = lang.get('languageCode')
1545 if lang_code:
1546 sub_lang_list.append(lang_code)
1547 return make_captions(base_url, sub_lang_list)
1548
b78b292f
S
1549 # Some videos don't provide ttsurl but rather caption_tracks and
1550 # caption_translation_languages (e.g. 20LmZk1hakA)
ddbb4c5c 1551 # Does not used anymore as of 22.06.2017
b78b292f
S
1552 caption_tracks = args['caption_tracks']
1553 caption_translation_languages = args['caption_translation_languages']
1554 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
ddbb4c5c 1555 sub_lang_list = []
b78b292f
S
1556 for lang in caption_translation_languages.split(','):
1557 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1558 sub_lang = lang_qs.get('lc', [None])[0]
ddbb4c5c
S
1559 if sub_lang:
1560 sub_lang_list.append(sub_lang)
1561 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1562 # An extractor error can be raise by the download process if there are
1563 # no automatic captions but there are subtitles
ddbb4c5c 1564 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1565 self._downloader.report_warning(err_msg)
1566 return {}
1567
21c340b8
S
1568 def _mark_watched(self, video_id, video_info, player_response):
1569 playback_url = url_or_none(try_get(
1570 player_response,
1571 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1572 video_info, lambda x: x['videostats_playback_base_url'][0]))
d77ab8e2
S
1573 if not playback_url:
1574 return
1575 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1576 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1577
1578 # cpn generation algorithm is reverse engineered from base.js.
1579 # In fact it works even with dummy cpn.
1580 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1581 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1582
1583 qs.update({
1584 'ver': ['2'],
1585 'cpn': [cpn],
1586 })
1587 playback_url = compat_urlparse.urlunparse(
15707c7e 1588 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1589
1590 self._download_webpage(
1591 playback_url, video_id, 'Marking watched',
1592 'Unable to mark watched', fatal=False)
1593
66c9fa36
S
1594 @staticmethod
1595 def _extract_urls(webpage):
1596 # Embedded YouTube player
1597 entries = [
1598 unescapeHTML(mobj.group('url'))
1599 for mobj in re.finditer(r'''(?x)
1600 (?:
1601 <iframe[^>]+?src=|
1602 data-video-url=|
1603 <embed[^>]+?src=|
1604 embedSWF\(?:\s*|
1605 <object[^>]+data=|
1606 new\s+SWFObject\(
1607 )
1608 (["\'])
1609 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1610 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1611 \1''', webpage)]
1612
1613 # lazyYT YouTube embed
1614 entries.extend(list(map(
1615 unescapeHTML,
1616 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1617
1618 # Wordpress "YouTube Video Importer" plugin
1619 matches = re.findall(r'''(?x)<div[^>]+
1620 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1621 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1622 entries.extend(m[-1] for m in matches)
1623
1624 return entries
1625
1626 @staticmethod
1627 def _extract_url(webpage):
1628 urls = YoutubeIE._extract_urls(webpage)
1629 return urls[0] if urls else None
1630
97665381
PH
1631 @classmethod
1632 def extract_id(cls, url):
1633 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1634 if mobj is None:
69ea8ca4 1635 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1636 video_id = mobj.group(2)
1637 return video_id
1638
9cafc3fd
S
1639 @staticmethod
1640 def _extract_chapters(description, duration):
1641 if not description:
1642 return None
1643 chapter_lines = re.findall(
1644 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1645 description)
1646 if not chapter_lines:
1647 return None
1648 chapters = []
1649 for next_num, (chapter_line, time_point) in enumerate(
1650 chapter_lines, start=1):
1651 start_time = parse_duration(time_point)
1652 if start_time is None:
1653 continue
39d4c1be
S
1654 if start_time > duration:
1655 break
9cafc3fd
S
1656 end_time = (duration if next_num == len(chapter_lines)
1657 else parse_duration(chapter_lines[next_num][1]))
1658 if end_time is None:
1659 continue
39d4c1be
S
1660 if end_time > duration:
1661 end_time = duration
1662 if start_time > end_time:
1663 break
9cafc3fd
S
1664 chapter_title = re.sub(
1665 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1666 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1667 chapters.append({
1668 'start_time': start_time,
1669 'end_time': end_time,
1670 'title': chapter_title,
1671 })
1672 return chapters
1673
c5e8d7af 1674 def _real_extract(self, url):
cf7e015f
S
1675 url, smuggled_data = unsmuggle_url(url, {})
1676
7e8c0af0 1677 proto = (
78caa52a
PH
1678 'http' if self._downloader.params.get('prefer_insecure', False)
1679 else 'https')
7e8c0af0 1680
7c80519c 1681 start_time = None
297a564b 1682 end_time = None
7c80519c
JMF
1683 parsed_url = compat_urllib_parse_urlparse(url)
1684 for component in [parsed_url.fragment, parsed_url.query]:
1685 query = compat_parse_qs(component)
297a564b 1686 if start_time is None and 't' in query:
7c80519c 1687 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1688 if start_time is None and 'start' in query:
1689 start_time = parse_duration(query['start'][0])
297a564b
JMF
1690 if end_time is None and 'end' in query:
1691 end_time = parse_duration(query['end'][0])
7c80519c 1692
c5e8d7af
PH
1693 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1694 mobj = re.search(self._NEXT_URL_RE, url)
1695 if mobj:
7fd002c0 1696 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1697 video_id = self.extract_id(url)
c5e8d7af
PH
1698
1699 # Get video webpage
aa79ac0c 1700 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
011e75e6
S
1701 video_webpage, urlh = self._download_webpage_handle(url, video_id)
1702
1703 qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1704 video_id = qs.get('v', [None])[0] or video_id
c5e8d7af
PH
1705
1706 # Attempt to extract SWF player URL
e0df6211 1707 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1708 if mobj is not None:
1709 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1710 else:
1711 player_url = None
1712
d8d24a92
S
1713 dash_mpds = []
1714
1715 def add_dash_mpd(video_info):
1716 dash_mpd = video_info.get('dashmpd')
1717 if dash_mpd and dash_mpd[0] not in dash_mpds:
1718 dash_mpds.append(dash_mpd[0])
1719
561b456e
S
1720 def add_dash_mpd_pr(pl_response):
1721 dash_mpd = url_or_none(try_get(
1722 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1723 compat_str))
1724 if dash_mpd and dash_mpd not in dash_mpds:
1725 dash_mpds.append(dash_mpd)
1726
c7121fa7
S
1727 is_live = None
1728 view_count = None
1729
1730 def extract_view_count(v_info):
1731 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1732
c2d125d9
S
1733 def extract_player_response(player_response, video_id):
1734 pl_response = str_or_none(player_response)
1735 if not pl_response:
1736 return
1737 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1738 if isinstance(pl_response, dict):
1739 add_dash_mpd_pr(pl_response)
1740 return pl_response
1741
dbdaaa23
S
1742 player_response = {}
1743
c5e8d7af 1744 # Get video info
43ebf77d 1745 video_info = {}
6449cd80 1746 embed_webpage = None
c108eb73 1747 if re.search(r'player-age-gate-content">', video_webpage) is not None:
c108eb73
JMF
1748 age_gate = True
1749 # We simulate the access to the video from www.youtube.com/v/{video_id}
1750 # this can be viewed without login into Youtube
beb95e77
CL
1751 url = proto + '://www.youtube.com/embed/%s' % video_id
1752 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
15707c7e 1753 data = compat_urllib_parse_urlencode({
2c57c7fa
JMF
1754 'video_id': video_id,
1755 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c084c934 1756 'sts': self._search_regex(
beb95e77 1757 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
2c57c7fa 1758 })
7e8c0af0 1759 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
d332ec72
S
1760 try:
1761 video_info_webpage = self._download_webpage(
1762 video_info_url, video_id,
1763 note='Refetching age-gated info webpage',
1764 errnote='unable to download video info webpage')
1765 except ExtractorError:
1766 video_info_webpage = None
1767 if video_info_webpage:
1768 video_info = compat_parse_qs(video_info_webpage)
1769 pl_response = video_info.get('player_response', [None])[0]
1770 player_response = extract_player_response(pl_response, video_id)
1771 add_dash_mpd(video_info)
1772 view_count = extract_view_count(video_info)
c108eb73
JMF
1773 else:
1774 age_gate = False
d8d24a92 1775 # Try looking directly into the video webpage
a72778d3
S
1776 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1777 if ytplayer_config:
4e62ebe2 1778 args = ytplayer_config['args']
4c76aa06 1779 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1780 # Convert to the same format returned by compat_parse_qs
1781 video_info = dict((k, [v]) for k, v in args.items())
1782 add_dash_mpd(video_info)
6496ccb4
S
1783 # Rental video is not rented but preview is available (e.g.
1784 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
067aa17e 1785 # https://github.com/ytdl-org/youtube-dl/issues/10532)
6496ccb4
S
1786 if not video_info and args.get('ypc_vid'):
1787 return self.url_result(
1788 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1789 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1790 is_live = True
dbdaaa23 1791 if not player_response:
c2d125d9 1792 player_response = extract_player_response(args.get('player_response'), video_id)
0a3cf9ad 1793 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
561b456e 1794 add_dash_mpd_pr(player_response)
bbb7c3f7
YCH
1795
1796 def extract_unavailable_message():
0add33ab
S
1797 messages = []
1798 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1799 msg = self._html_search_regex(
1800 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1801 video_webpage, 'unavailable %s' % kind, default=None)
1802 if msg:
1803 messages.append(msg)
1804 if messages:
1805 return '\n'.join(messages)
bbb7c3f7 1806
f93abcf1 1807 if not video_info and not player_response:
15be3eb5
RA
1808 unavailable_message = extract_unavailable_message()
1809 if not unavailable_message:
1810 unavailable_message = 'Unable to extract video data'
1811 raise ExtractorError(
1812 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1813
f93abcf1
S
1814 if not isinstance(video_info, dict):
1815 video_info = {}
1816
dbdaaa23
S
1817 video_details = try_get(
1818 player_response, lambda x: x['videoDetails'], dict) or {}
1819
8dbf751a
RA
1820 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1821 if not video_title:
cf7e015f
S
1822 self._downloader.report_warning('Unable to extract video title')
1823 video_title = '_'
1824
9cafc3fd 1825 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1826 if video_description:
fa4bc6e7
RA
1827
1828 def replace_url(m):
1829 redir_url = compat_urlparse.urljoin(url, m.group(1))
1830 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1831 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1832 qs = compat_parse_qs(parsed_redir_url.query)
1833 q = qs.get('q')
1834 if q and q[0]:
1835 return q[0]
1836 return redir_url
1837
9cafc3fd 1838 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1839 <a\s+
25cb7a0e 1840 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1841 (?:title|href)="([^"]+)"\s+
25cb7a0e 1842 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1843 class="[^"]*"[^>]*>
23f13e97 1844 [^<]+\.{3}\s*
cf7e015f 1845 </a>
fa4bc6e7 1846 ''', replace_url, video_description)
cf7e015f
S
1847 video_description = clean_html(video_description)
1848 else:
8dbf751a 1849 video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
cf7e015f 1850
8fe10494 1851 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1852 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1853 multifeed_metadata_list = try_get(
1854 player_response,
1855 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1856 compat_str) or try_get(
1857 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1858 if multifeed_metadata_list:
1859 entries = []
1860 feed_ids = []
1861 for feed in multifeed_metadata_list.split(','):
1862 # Unquote should take place before split on comma (,) since textual
1863 # fields may contain comma as well (see
067aa17e 1864 # https://github.com/ytdl-org/youtube-dl/issues/8536)
8fe10494 1865 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1866
1867 def feed_entry(name):
1868 return try_get(feed_data, lambda x: x[name][0], compat_str)
1869
1870 feed_id = feed_entry('id')
1871 if not feed_id:
1872 continue
1873 feed_title = feed_entry('title')
1874 title = video_title
1875 if feed_title:
1876 title += ' (%s)' % feed_title
8fe10494
S
1877 entries.append({
1878 '_type': 'url_transparent',
1879 'ie_key': 'Youtube',
1880 'url': smuggle_url(
1881 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1882 {'force_singlefeed': True}),
6b09401b 1883 'title': title,
8fe10494 1884 })
6b09401b 1885 feed_ids.append(feed_id)
8fe10494
S
1886 self.to_screen(
1887 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1888 % (', '.join(feed_ids), video_id))
1889 return self.playlist_result(entries, video_id, video_title, video_description)
1890 else:
1891 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1892
c7121fa7 1893 if view_count is None:
1c9c8de2 1894 view_count = extract_view_count(video_info)
dbdaaa23
S
1895 if view_count is None and video_details:
1896 view_count = int_or_none(video_details.get('viewCount'))
1d699755 1897
27019dbb 1898 if is_live is None:
898238e9 1899 is_live = bool_or_none(video_details.get('isLive'))
27019dbb 1900
c5e8d7af
PH
1901 # Check for "rental" videos
1902 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
067aa17e 1903 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 1904
c63ca0ee
S
1905 def _extract_filesize(media_url):
1906 return int_or_none(self._search_regex(
1907 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
1908
bf1317d2
S
1909 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
1910 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
1911
c5e8d7af
PH
1912 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1913 self.report_rtmp_download()
dd27fd17
PH
1914 formats = [{
1915 'format_id': '_rtmp',
1916 'protocol': 'rtmp',
1917 'url': video_info['conn'][0],
1918 'player_url': player_url,
1919 }]
bf1317d2 1920 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 1921 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 1922 if 'rtmpe%3Dyes' in encoded_url_map:
067aa17e 1923 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
bf1317d2 1924 formats = []
3318832e 1925 formats_spec = {}
82156fdb 1926 fmt_list = video_info.get('fmt_list', [''])[0]
1927 if fmt_list:
1928 for fmt in fmt_list.split(','):
1929 spec = fmt.split('/')
3318832e 1930 if len(spec) > 1:
1931 width_height = spec[1].split('x')
1932 if len(width_height) == 2:
1933 formats_spec[spec[0]] = {
1934 'resolution': spec[1],
1935 'width': int_or_none(width_height[0]),
1936 'height': int_or_none(width_height[1]),
1937 }
bf1317d2
S
1938 for fmt in streaming_formats:
1939 itag = str_or_none(fmt.get('itag'))
1940 if not itag:
201e9eaa 1941 continue
bf1317d2
S
1942 quality = fmt.get('quality')
1943 quality_label = fmt.get('qualityLabel') or quality
1944 formats_spec[itag] = {
1945 'asr': int_or_none(fmt.get('audioSampleRate')),
1946 'filesize': int_or_none(fmt.get('contentLength')),
1947 'format_note': quality_label,
1948 'fps': int_or_none(fmt.get('fps')),
1949 'height': int_or_none(fmt.get('height')),
bf1317d2
S
1950 # bitrate for itag 43 is always 2147483647
1951 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
1952 'width': int_or_none(fmt.get('width')),
1953 }
1954
1955 for fmt in streaming_formats:
00eb865b 1956 if fmt.get('drmFamilies') or fmt.get('drm_families'):
bf1317d2
S
1957 continue
1958 url = url_or_none(fmt.get('url'))
1959
1960 if not url:
1961 cipher = fmt.get('cipher')
1962 if not cipher:
1963 continue
1964 url_data = compat_parse_qs(cipher)
1965 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
1966 if not url:
1967 continue
1968 else:
1969 cipher = None
1970 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
1971
2f483bc1
S
1972 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
1973 # Unsupported FORMAT_STREAM_TYPE_OTF
1974 if stream_type == 3:
1975 continue
6449cd80 1976
bf1317d2
S
1977 format_id = fmt.get('itag') or url_data['itag'][0]
1978 if not format_id:
1979 continue
1980 format_id = compat_str(format_id)
a49eccdf 1981
bf1317d2
S
1982 if cipher:
1983 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
1984 ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
1985 jsplayer_url_json = self._search_regex(
1986 ASSETS_RE,
1987 embed_webpage if age_gate else video_webpage,
1988 'JS player URL (1)', default=None)
1989 if not jsplayer_url_json and not age_gate:
1990 # We need the embed website after all
1991 if embed_webpage is None:
1992 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
1993 embed_webpage = self._download_webpage(
1994 embed_url, video_id, 'Downloading embed webpage')
1995 jsplayer_url_json = self._search_regex(
1996 ASSETS_RE, embed_webpage, 'JS player URL')
1997
1998 player_url = json.loads(jsplayer_url_json)
cf010131 1999 if player_url is None:
bf1317d2
S
2000 player_url_json = self._search_regex(
2001 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2002 video_webpage, 'age gate player URL')
2003 player_url = json.loads(player_url_json)
2004
2005 if 'sig' in url_data:
2006 url += '&signature=' + url_data['sig'][0]
2007 elif 's' in url_data:
2008 encrypted_sig = url_data['s'][0]
2009
2010 if self._downloader.params.get('verbose'):
2011 if player_url is None:
2012 player_version = 'unknown'
2013 player_desc = 'unknown'
cf010131 2014 else:
bf1317d2
S
2015 if player_url.endswith('swf'):
2016 player_version = self._search_regex(
2017 r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
2018 'flash player', fatal=False)
2019 player_desc = 'flash player %s' % player_version
2020 else:
2021 player_version = self._search_regex(
2022 [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
dc879c5a 2023 r'(?:www|player(?:_ias)?)[-.]([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
bf1317d2
S
2024 player_url,
2025 'html5 player', fatal=False)
2026 player_desc = 'html5 player %s' % player_version
2027
2028 parts_sizes = self._signature_cache_id(encrypted_sig)
2029 self.to_screen('{%s} signature length %s, %s' %
2030 (format_id, parts_sizes, player_desc))
2031
2032 signature = self._decrypt_signature(
2033 encrypted_sig, video_id, player_url, age_gate)
2034 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2035 url += '&%s=%s' % (sp, signature)
201e9eaa
PH
2036 if 'ratebypass' not in url:
2037 url += '&ratebypass=yes'
c9afb51c 2038
94278f72
YCH
2039 dct = {
2040 'format_id': format_id,
2041 'url': url,
2042 'player_url': player_url,
2043 }
2044 if format_id in self._formats:
2045 dct.update(self._formats[format_id])
3318832e 2046 if format_id in formats_spec:
2047 dct.update(formats_spec[format_id])
94278f72 2048
aabc2be6 2049 # Some itags are not included in DASH manifest thus corresponding formats will
067aa17e 2050 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
aabc2be6
S
2051 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2052 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2053 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 2054
bf1317d2
S
2055 if width is None:
2056 width = int_or_none(fmt.get('width'))
2057 if height is None:
2058 height = int_or_none(fmt.get('height'))
2059
c63ca0ee
S
2060 filesize = int_or_none(url_data.get(
2061 'clen', [None])[0]) or _extract_filesize(url)
2062
bf1317d2
S
2063 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2064 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2065
4878759f
S
2066 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2067 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
bf1317d2 2068 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
54fc90aa 2069
94278f72 2070 more_fields = {
c63ca0ee 2071 'filesize': filesize,
bf1317d2 2072 'tbr': tbr,
c9afb51c
AH
2073 'width': width,
2074 'height': height,
bf1317d2
S
2075 'fps': fps,
2076 'format_note': quality_label or quality,
c9afb51c 2077 }
94278f72
YCH
2078 for key, value in more_fields.items():
2079 if value:
2080 dct[key] = value
bf1317d2 2081 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
aabc2be6
S
2082 if type_:
2083 type_split = type_.split(';')
2084 kind_ext = type_split[0].split('/')
2085 if len(kind_ext) == 2:
94278f72
YCH
2086 kind, _ = kind_ext
2087 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
2088 if kind in ('audio', 'video'):
2089 codecs = None
2090 for mobj in re.finditer(
2091 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2092 if mobj.group('key') == 'codecs':
2093 codecs = mobj.group('val')
2094 break
2095 if codecs:
6310acf5 2096 dct.update(parse_codecs(codecs))
e4a60912
S
2097 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2098 dct['downloader_options'] = {
2099 # Youtube throttles chunks >~10M
2100 'http_chunk_size': 10485760,
2101 }
aabc2be6 2102 formats.append(dct)
c5e8d7af 2103 else:
c3e54389
S
2104 manifest_url = (
2105 url_or_none(try_get(
2106 player_response,
2107 lambda x: x['streamingData']['hlsManifestUrl'],
3089bc74
S
2108 compat_str))
2109 or url_or_none(try_get(
c3e54389
S
2110 video_info, lambda x: x['hlsvp'][0], compat_str)))
2111 if manifest_url:
2112 formats = []
2113 m3u8_formats = self._extract_m3u8_formats(
2114 manifest_url, video_id, 'mp4', fatal=False)
2115 for a_format in m3u8_formats:
2116 itag = self._search_regex(
2117 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2118 if itag:
2119 a_format['format_id'] = itag
2120 if itag in self._formats:
2121 dct = self._formats[itag].copy()
2122 dct.update(a_format)
2123 a_format = dct
2124 a_format['player_url'] = player_url
2125 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2126 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
2127 formats.append(a_format)
2128 else:
13577349 2129 error_message = extract_unavailable_message()
c3e54389 2130 if not error_message:
13577349
S
2131 error_message = clean_html(try_get(
2132 player_response, lambda x: x['playabilityStatus']['reason'],
2133 compat_str))
2134 if not error_message:
2135 error_message = clean_html(
2136 try_get(video_info, lambda x: x['reason'][0], compat_str))
c3e54389
S
2137 if error_message:
2138 raise ExtractorError(error_message, expected=True)
2139 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 2140
7e72694b 2141 # uploader
dbdaaa23
S
2142 video_uploader = try_get(
2143 video_info, lambda x: x['author'][0],
2144 compat_str) or str_or_none(video_details.get('author'))
7e72694b
S
2145 if video_uploader:
2146 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2147 else:
2148 self._downloader.report_warning('unable to extract uploader name')
2149
2150 # uploader_id
2151 video_uploader_id = None
2152 video_uploader_url = None
2153 mobj = re.search(
2154 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2155 video_webpage)
2156 if mobj is not None:
2157 video_uploader_id = mobj.group('uploader_id')
2158 video_uploader_url = mobj.group('uploader_url')
2159 else:
2160 self._downloader.report_warning('unable to extract uploader nickname')
2161
b45a9e69 2162 channel_id = (
3089bc74
S
2163 str_or_none(video_details.get('channelId'))
2164 or self._html_search_meta(
2165 'channelId', video_webpage, 'channel id', default=None)
2166 or self._search_regex(
b45a9e69 2167 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2168 video_webpage, 'channel id', default=None, group='id'))
dd4c4492
S
2169 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2170
7e72694b
S
2171 # thumbnail image
2172 # We try first to get a high quality image:
2173 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2174 video_webpage, re.DOTALL)
2175 if m_thumb is not None:
2176 video_thumbnail = m_thumb.group(1)
2177 elif 'thumbnail_url' not in video_info:
2178 self._downloader.report_warning('unable to extract video thumbnail')
2179 video_thumbnail = None
2180 else: # don't panic if we can't find it
2181 video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
2182
2183 # upload date
2184 upload_date = self._html_search_meta(
2185 'datePublished', video_webpage, 'upload date', default=None)
2186 if not upload_date:
2187 upload_date = self._search_regex(
2188 [r'(?s)id="eow-date.*?>(.*?)</span>',
2189 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2190 video_webpage, 'upload date', default=None)
2191 upload_date = unified_strdate(upload_date)
2192
2193 video_license = self._html_search_regex(
2194 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2195 video_webpage, 'license', default=None)
2196
2197 m_music = re.search(
2198 r'''(?x)
2199 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2200 <ul[^>]*>\s*
2201 <li>(?P<title>.+?)
2202 by (?P<creator>.+?)
2203 (?:
2204 \(.+?\)|
2205 <a[^>]*
2206 (?:
2207 \bhref=["\']/red[^>]*>| # drop possible
2208 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2209 )
2210 .*?
2211 )?</li
2212 ''',
2213 video_webpage)
2214 if m_music:
2215 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2216 video_creator = clean_html(m_music.group('creator'))
2217 else:
2218 video_alt_title = video_creator = None
2219
2220 def extract_meta(field):
2221 return self._html_search_regex(
2222 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2223 video_webpage, field, default=None)
2224
2225 track = extract_meta('Song')
2226 artist = extract_meta('Artist')
92bc97d3 2227 album = extract_meta('Album')
822b9d9c
RA
2228
2229 # Youtube Music Auto-generated description
92bc97d3 2230 release_date = release_year = None
822b9d9c
RA
2231 if video_description:
2232 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2233 if mobj:
2234 if not track:
2235 track = mobj.group('track').strip()
2236 if not artist:
2237 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
92bc97d3
RA
2238 if not album:
2239 album = mobj.group('album'.strip())
822b9d9c
RA
2240 release_year = mobj.group('release_year')
2241 release_date = mobj.group('release_date')
2242 if release_date:
2243 release_date = release_date.replace('-', '')
2244 if not release_year:
2245 release_year = int(release_date[:4])
2246 if release_year:
2247 release_year = int(release_year)
7e72694b
S
2248
2249 m_episode = re.search(
2250 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2251 video_webpage)
2252 if m_episode:
c2dd2dc0 2253 series = unescapeHTML(m_episode.group('series'))
7e72694b
S
2254 season_number = int(m_episode.group('season'))
2255 episode_number = int(m_episode.group('episode'))
2256 else:
2257 series = season_number = episode_number = None
2258
2259 m_cat_container = self._search_regex(
2260 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2261 video_webpage, 'categories', default=None)
2262 if m_cat_container:
2263 category = self._html_search_regex(
2264 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2265 default=None)
2266 video_categories = None if category is None else [category]
2267 else:
2268 video_categories = None
2269
2270 video_tags = [
2271 unescapeHTML(m.group('content'))
2272 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
2273
2274 def _extract_count(count_name):
2275 return str_to_int(self._search_regex(
2276 r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
2277 % re.escape(count_name),
2278 video_webpage, count_name, default=None))
2279
2280 like_count = _extract_count('like')
2281 dislike_count = _extract_count('dislike')
2282
dbdaaa23
S
2283 if view_count is None:
2284 view_count = str_to_int(self._search_regex(
2285 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2286 'view count', default=None))
2287
bf3c9326
S
2288 average_rating = (
2289 float_or_none(video_details.get('averageRating'))
2290 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2291
7e72694b
S
2292 # subtitles
2293 video_subtitles = self.extract_subtitles(video_id, video_webpage)
2294 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2295
2296 video_duration = try_get(
2297 video_info, lambda x: int_or_none(x['length_seconds'][0]))
dbdaaa23
S
2298 if not video_duration:
2299 video_duration = int_or_none(video_details.get('lengthSeconds'))
7e72694b
S
2300 if not video_duration:
2301 video_duration = parse_duration(self._html_search_meta(
2302 'duration', video_webpage, 'video duration'))
2303
2304 # annotations
2305 video_annotations = None
2306 if self._downloader.params.get('writeannotations', False):
64b6a4e9
RA
2307 xsrf_token = self._search_regex(
2308 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2309 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2310 invideo_url = try_get(
2311 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2312 if xsrf_token and invideo_url:
2313 xsrf_field_name = self._search_regex(
2314 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2315 video_webpage, 'xsrf field name',
2316 group='xsrf_field_name', default='session_token')
2317 video_annotations = self._download_webpage(
2318 self._proto_relative_url(invideo_url),
2319 video_id, note='Downloading annotations',
2320 errnote='Unable to download video annotations', fatal=False,
2321 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b
S
2322
2323 chapters = self._extract_chapters(description_original, video_duration)
2324
dd27fd17 2325 # Look for the DASH manifest
203fb43f 2326 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2327 dash_mpd_fatal = True
8ff648e4 2328 for mpd_url in dash_mpds:
d8d24a92 2329 dash_formats = {}
774e208f 2330 try:
05d0d131
YCH
2331 def decrypt_sig(mobj):
2332 s = mobj.group(1)
2333 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2334 return '/signature/%s' % dec_s
2335
8ff648e4 2336 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2337
8ff648e4 2338 for df in self._extract_mpd_formats(
2339 mpd_url, video_id, fatal=dash_mpd_fatal,
2340 formats_dict=self._formats):
c63ca0ee
S
2341 if not df.get('filesize'):
2342 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2343 # Do not overwrite DASH format found in some previous DASH manifest
2344 if df['format_id'] not in dash_formats:
2345 dash_formats[df['format_id']] = df
77c6fb5b
S
2346 # Additional DASH manifests may end up in HTTP Error 403 therefore
2347 # allow them to fail without bug report message if we already have
2348 # some DASH manifest succeeded. This is temporary workaround to reduce
2349 # burst of bug reports until we figure out the reason and whether it
2350 # can be fixed at all.
2351 dash_mpd_fatal = False
774e208f
PH
2352 except (ExtractorError, KeyError) as e:
2353 self.report_warning(
2354 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2355 if dash_formats:
04b3b3df
JMF
2356 # Remove the formats we found through non-DASH, they
2357 # contain less info and it can be wrong, because we use
2358 # fixed values (for example the resolution). See
067aa17e 2359 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
04b3b3df 2360 # example.
d80265cc 2361 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2362 formats.extend(dash_formats.values())
d80044c2 2363
6271f1ca
PH
2364 # Check for malformed aspect ratio
2365 stretched_m = re.search(
2366 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2367 video_webpage)
2368 if stretched_m:
313dfc45
LL
2369 w = float(stretched_m.group('w'))
2370 h = float(stretched_m.group('h'))
5faf9fed
S
2371 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2372 # We will only process correct ratios.
313dfc45 2373 if w > 0 and h > 0:
41f24c32 2374 ratio = w / h
313dfc45
LL
2375 for f in formats:
2376 if f.get('vcodec') != 'none':
2377 f['stretched_ratio'] = ratio
6271f1ca 2378
026fbedc 2379 if not formats:
43ebf77d
S
2380 if 'reason' in video_info:
2381 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2382 regions_allowed = self._html_search_meta(
2383 'regionsAllowed', video_webpage, default=None)
2384 countries = regions_allowed.split(',') if regions_allowed else None
2385 self.raise_geo_restricted(
2386 msg=video_info['reason'][0], countries=countries)
2387 reason = video_info['reason'][0]
2388 if 'Invalid parameters' in reason:
2389 unavailable_message = extract_unavailable_message()
2390 if unavailable_message:
2391 reason = unavailable_message
2392 raise ExtractorError(
2393 'YouTube said: %s' % reason,
2394 expected=True, video_id=video_id)
2395 if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2396 raise ExtractorError('This video is DRM protected.', expected=True)
0d297518 2397
4bcc7bd1 2398 self._sort_formats(formats)
4ea3be0a 2399
21c340b8 2400 self.mark_watched(video_id, video_info, player_response)
d77ab8e2 2401
4ea3be0a 2402 return {
8bcc8756
JW
2403 'id': video_id,
2404 'uploader': video_uploader,
2405 'uploader_id': video_uploader_id,
fd050249 2406 'uploader_url': video_uploader_url,
dd4c4492
S
2407 'channel_id': channel_id,
2408 'channel_url': channel_url,
8bcc8756 2409 'upload_date': upload_date,
7caf9830 2410 'license': video_license,
936784b2 2411 'creator': video_creator or artist,
8bcc8756 2412 'title': video_title,
936784b2 2413 'alt_title': video_alt_title or track,
8bcc8756
JW
2414 'thumbnail': video_thumbnail,
2415 'description': video_description,
2416 'categories': video_categories,
000b6b5a 2417 'tags': video_tags,
8bcc8756 2418 'subtitles': video_subtitles,
360e1ca5 2419 'automatic_captions': automatic_captions,
8bcc8756
JW
2420 'duration': video_duration,
2421 'age_limit': 18 if age_gate else 0,
2422 'annotations': video_annotations,
9cafc3fd 2423 'chapters': chapters,
7e8c0af0 2424 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2425 'view_count': view_count,
4ea3be0a 2426 'like_count': like_count,
2427 'dislike_count': dislike_count,
bf3c9326 2428 'average_rating': average_rating,
8bcc8756 2429 'formats': formats,
2fe1ff85 2430 'is_live': is_live,
7c80519c 2431 'start_time': start_time,
297a564b 2432 'end_time': end_time,
12afdc2a
S
2433 'series': series,
2434 'season_number': season_number,
2435 'episode_number': episode_number,
936784b2
S
2436 'track': track,
2437 'artist': artist,
5caabd3c 2438 'album': album,
2439 'release_date': release_date,
2440 'release_year': release_year,
4ea3be0a 2441 }
c5e8d7af 2442
5f6a1245 2443
8e7aad20 2444class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2445 IE_DESC = 'YouTube.com playlists'
d67cc9fa 2446 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
2447 (?:https?://)?
2448 (?:\w+\.)?
c5e8d7af 2449 (?:
c0345b82 2450 (?:
66b48727 2451 youtube(?:kids)?\.com|
c0345b82
S
2452 invidio\.us
2453 )
2454 /
feaa5ad7 2455 (?:
87dadd45 2456 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
feaa5ad7
S
2457 \? (?:.*?[&;])*? (?:p|a|list)=
2458 | p/
2459 )|
2460 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
c5e8d7af 2461 )
d67cc9fa 2462 (
66b48727 2463 (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
5f6a1245 2464 # Top tracks, they can also include dots
d67cc9fa
JMF
2465 |(?:MC)[\w\.]*
2466 )
c5e8d7af
PH
2467 .*
2468 |
d0ba5587
S
2469 (%(playlist_id)s)
2470 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
8d81f3e3 2471 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
351f37c0
S
2472 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2473 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
78caa52a 2474 IE_NAME = 'youtube:playlist'
81127aa5 2475 _TESTS = [{
0e30a7b9 2476 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2477 'info_dict': {
0e30a7b9 2478 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2479 'uploader': 'Sergey M.',
2480 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
2481 'title': 'youtube-dl public playlist',
81127aa5 2482 },
0e30a7b9 2483 'playlist_count': 1,
9291475f 2484 }, {
0e30a7b9 2485 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2486 'info_dict': {
0e30a7b9 2487 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2488 'uploader': 'Sergey M.',
2489 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
2490 'title': 'youtube-dl empty playlist',
9291475f
PH
2491 },
2492 'playlist_count': 0,
2493 }, {
2494 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2495 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2496 'info_dict': {
2497 'title': '29C3: Not my department',
acf757f4 2498 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
13a75688
S
2499 'uploader': 'Christiaan008',
2500 'uploader_id': 'ChRiStIaAn008',
9291475f 2501 },
0e30a7b9 2502 'playlist_count': 96,
9291475f
PH
2503 }, {
2504 'note': 'issue #673',
2505 'url': 'PLBB231211A4F62143',
2506 'info_dict': {
f46a8702 2507 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 2508 'id': 'PLBB231211A4F62143',
13a75688
S
2509 'uploader': 'Wickydoo',
2510 'uploader_id': 'Wickydoo',
9291475f
PH
2511 },
2512 'playlist_mincount': 26,
2513 }, {
2514 'note': 'Large playlist',
2515 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2516 'info_dict': {
2517 'title': 'Uploads from Cauchemar',
acf757f4 2518 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
13a75688
S
2519 'uploader': 'Cauchemar',
2520 'uploader_id': 'Cauchemar89',
9291475f
PH
2521 },
2522 'playlist_mincount': 799,
2523 }, {
2524 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2525 'info_dict': {
2526 'title': 'YDL_safe_search',
acf757f4 2527 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
2528 },
2529 'playlist_count': 2,
4201ba13 2530 'skip': 'This playlist is private',
ac7553d0
PH
2531 }, {
2532 'note': 'embedded',
2d3d2997 2533 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0
PH
2534 'playlist_count': 4,
2535 'info_dict': {
2536 'title': 'JODA15',
acf757f4 2537 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
13a75688
S
2538 'uploader': 'milan',
2539 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
ac7553d0 2540 }
87dadd45
S
2541 }, {
2542 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2543 'playlist_mincount': 485,
2544 'info_dict': {
13a75688 2545 'title': '2018 Chinese New Singles (11/6 updated)',
87dadd45 2546 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
13a75688
S
2547 'uploader': 'LBK',
2548 'uploader_id': 'sdragonfang',
87dadd45 2549 }
6b08cdf6
PH
2550 }, {
2551 'note': 'Embedded SWF player',
2d3d2997 2552 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
6b08cdf6
PH
2553 'playlist_count': 4,
2554 'info_dict': {
2555 'title': 'JODA7',
acf757f4 2556 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
13a75688
S
2557 },
2558 'skip': 'This playlist does not exist',
4b7df0d3
JMF
2559 }, {
2560 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2561 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2562 'info_dict': {
acf757f4
PH
2563 'title': 'Uploads from Interstellar Movie',
2564 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688
S
2565 'uploader': 'Interstellar Movie',
2566 'uploader_id': 'InterstellarMovie1',
4b7df0d3 2567 },
481cc733 2568 'playlist_mincount': 21,
dacb3a86
S
2569 }, {
2570 # Playlist URL that does not actually serve a playlist
2571 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2572 'info_dict': {
2573 'id': 'FqZTN594JQw',
2574 'ext': 'webm',
2575 'title': "Smiley's People 01 detective, Adventure Series, Action",
2576 'uploader': 'STREEM',
2577 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2578 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2579 'upload_date': '20150526',
2580 'license': 'Standard YouTube License',
2581 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2582 'categories': ['People & Blogs'],
2583 'tags': list,
dbdaaa23 2584 'view_count': int,
dacb3a86
S
2585 'like_count': int,
2586 'dislike_count': int,
2587 },
2588 'params': {
2589 'skip_download': True,
2590 },
13a75688 2591 'skip': 'This video is not available.',
dacb3a86 2592 'add_ie': [YoutubeIE.ie_key()],
481cc733
S
2593 }, {
2594 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2595 'info_dict': {
2596 'id': 'yeWKywCrFtk',
2597 'ext': 'mp4',
2598 'title': 'Small Scale Baler and Braiding Rugs',
2599 'uploader': 'Backus-Page House Museum',
2600 'uploader_id': 'backuspagemuseum',
ec85ded8 2601 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
481cc733 2602 'upload_date': '20161008',
481cc733
S
2603 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2604 'categories': ['Nonprofits & Activism'],
2605 'tags': list,
2606 'like_count': int,
2607 'dislike_count': int,
2608 },
2609 'params': {
2610 'noplaylist': True,
2611 'skip_download': True,
2612 },
2e18adec
S
2613 }, {
2614 # https://github.com/ytdl-org/youtube-dl/issues/21844
2615 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2616 'info_dict': {
2617 'title': 'Data Analysis with Dr Mike Pound',
2618 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2619 'uploader_id': 'Computerphile',
2620 'uploader': 'Computerphile',
2621 },
2622 'playlist_mincount': 11,
feaa5ad7
S
2623 }, {
2624 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2625 'only_matching': True,
a6857510
S
2626 }, {
2627 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2628 'only_matching': True,
409b9324
S
2629 }, {
2630 # music album playlist
2631 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2632 'only_matching': True,
c0345b82
S
2633 }, {
2634 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2635 'only_matching': True,
66b48727
RA
2636 }, {
2637 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2638 'only_matching': True,
81127aa5 2639 }]
c5e8d7af 2640
880e1c52
JMF
2641 def _real_initialize(self):
2642 self._login()
2643
351f37c0
S
2644 def extract_videos_from_page(self, page):
2645 ids_in_page = []
2646 titles_in_page = []
2647
2648 for item in re.findall(
2649 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2650 attrs = extract_attributes(item)
2651 video_id = attrs['data-video-id']
2652 video_title = unescapeHTML(attrs.get('data-title'))
2653 if video_title:
2654 video_title = video_title.strip()
2655 ids_in_page.append(video_id)
2656 titles_in_page.append(video_title)
2657
2658 # Fallback with old _VIDEO_RE
2659 self.extract_videos_from_page_impl(
2660 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2661
2662 # Relaxed fallbacks
2663 self.extract_videos_from_page_impl(
2664 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2665 ids_in_page, titles_in_page)
2666 self.extract_videos_from_page_impl(
2667 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2668 ids_in_page, titles_in_page)
2669
2670 return zip(ids_in_page, titles_in_page)
2671
652cdaa2 2672 def _extract_mix(self, playlist_id):
99209c29 2673 # The mixes are generated from a single video
652cdaa2 2674 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
2675 ids = []
2676 last_id = playlist_id[-11:]
2677 for n in itertools.count(1):
2678 url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
2679 webpage = self._download_webpage(
2680 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2681 new_ids = orderedSet(re.findall(
2682 r'''(?xs)data-video-username=".*?".*?
2683 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2684 webpage))
2685 # Fetch new pages until all the videos are repeated, it seems that
2686 # there are always 51 unique videos.
2687 new_ids = [_id for _id in new_ids if _id not in ids]
2688 if not new_ids:
2689 break
2690 ids.extend(new_ids)
2691 last_id = ids[-1]
2692
2693 url_results = self._ids_to_results(ids)
2694
bc2f773b 2695 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5 2696 title_span = (
3089bc74
S
2697 search_title('playlist-title')
2698 or search_title('title long-title')
2699 or search_title('title'))
76d1700b 2700 title = clean_html(title_span)
652cdaa2
JMF
2701
2702 return self.playlist_result(url_results, playlist_id, title)
2703
448830ce 2704 def _extract_playlist(self, playlist_id):
dbb94fb0
S
2705 url = self._TEMPLATE_URL % playlist_id
2706 page = self._download_webpage(url, playlist_id)
dbb94fb0 2707
067aa17e 2708 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
8bc0800d 2709 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
39b62db1
YCH
2710 match = match.strip()
2711 # Check if the playlist exists or is private
4201ba13
S
2712 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2713 if mobj:
2714 reason = mobj.group('reason')
2715 message = 'This playlist %s' % reason
2716 if 'private' in reason:
2717 message += ', use --username or --netrc to access it'
2718 message += '.'
2719 raise ExtractorError(message, expected=True)
39b62db1
YCH
2720 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2721 raise ExtractorError(
2722 'Invalid parameters. Maybe URL is incorrect.',
2723 expected=True)
2724 elif re.match(r'[^<]*Choose your language[^<]*', match):
2725 continue
2726 else:
2727 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 2728
dbb94fb0 2729 playlist_title = self._html_search_regex(
63b4295d 2730 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
dacb3a86 2731 page, 'title', default=None)
c5e8d7af 2732
07aeced6 2733 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
4e3f1f04 2734 uploader = self._html_search_regex(
07aeced6
S
2735 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2736 page, 'uploader', default=None)
2737 mobj = re.search(
2738 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2739 page)
2740 if mobj:
2741 uploader_id = mobj.group('uploader_id')
2742 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2743 else:
2744 uploader_id = uploader_url = None
2745
dacb3a86
S
2746 has_videos = True
2747
2748 if not playlist_title:
2749 try:
2750 # Some playlist URLs don't actually serve a playlist (e.g.
2751 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2752 next(self._entries(page, playlist_id))
2753 except StopIteration:
2754 has_videos = False
2755
07aeced6 2756 playlist = self.playlist_result(
dacb3a86 2757 self._entries(page, playlist_id), playlist_id, playlist_title)
07aeced6
S
2758 playlist.update({
2759 'uploader': uploader,
2760 'uploader_id': uploader_id,
2761 'uploader_url': uploader_url,
2762 })
2763
2764 return has_videos, playlist
c5e8d7af 2765
ebf1b291 2766 def _check_download_just_video(self, url, playlist_id):
448830ce
S
2767 # Check if it's a video-specific URL
2768 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
481cc733 2769 video_id = query_dict.get('v', [None])[0] or self._search_regex(
87dadd45 2770 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
481cc733
S
2771 'video id', default=None)
2772 if video_id:
448830ce
S
2773 if self._downloader.params.get('noplaylist'):
2774 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
dacb3a86 2775 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce
S
2776 else:
2777 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
dacb3a86
S
2778 return video_id, None
2779 return None, None
448830ce 2780
ebf1b291
S
2781 def _real_extract(self, url):
2782 # Extract playlist id
2783 mobj = re.match(self._VALID_URL, url)
2784 if mobj is None:
2785 raise ExtractorError('Invalid URL: %s' % url)
2786 playlist_id = mobj.group(1) or mobj.group(2)
2787
dacb3a86 2788 video_id, video = self._check_download_just_video(url, playlist_id)
ebf1b291
S
2789 if video:
2790 return video
2791
466a6145 2792 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
2793 # Mixes require a custom extraction process
2794 return self._extract_mix(playlist_id)
2795
dacb3a86
S
2796 has_videos, playlist = self._extract_playlist(playlist_id)
2797 if has_videos or not video_id:
2798 return playlist
2799
2800 # Some playlist URLs don't actually serve a playlist (see
067aa17e 2801 # https://github.com/ytdl-org/youtube-dl/issues/10537).
dacb3a86
S
2802 # Fallback to plain video extraction if there is a video id
2803 # along with playlist id.
2804 return self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce 2805
c5e8d7af 2806
648e6a1f 2807class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2808 IE_DESC = 'YouTube.com channels'
66b48727 2809 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 2810 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 2811 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 2812 IE_NAME = 'youtube:channel'
cdc628a4
PH
2813 _TESTS = [{
2814 'note': 'paginated channel',
2815 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2816 'playlist_mincount': 91,
acf757f4 2817 'info_dict': {
9170ca5b
JMF
2818 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2819 'title': 'Uploads from lex will',
13a75688
S
2820 'uploader': 'lex will',
2821 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
acf757f4 2822 }
5c43afd4
JMF
2823 }, {
2824 'note': 'Age restricted channel',
2825 # from https://www.youtube.com/user/DeusExOfficial
2826 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
2827 'playlist_mincount': 64,
2828 'info_dict': {
2829 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
2830 'title': 'Uploads from Deus Ex',
13a75688
S
2831 'uploader': 'Deus Ex',
2832 'uploader_id': 'DeusExOfficial',
5c43afd4 2833 },
cd5a74a2
S
2834 }, {
2835 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
2836 'only_matching': True,
66b48727
RA
2837 }, {
2838 'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
2839 'only_matching': True,
cdc628a4 2840 }]
c5e8d7af 2841
e462474e
S
2842 @classmethod
2843 def suitable(cls, url):
f07e276a
S
2844 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
2845 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 2846
9558dcec
S
2847 def _build_template_url(self, url, channel_id):
2848 return self._TEMPLATE_URL % channel_id
2849
c5e8d7af 2850 def _real_extract(self, url):
9ff67727 2851 channel_id = self._match_id(url)
c5e8d7af 2852
9558dcec 2853 url = self._build_template_url(url, channel_id)
386bdfa6
S
2854
2855 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
2856 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
2857 # otherwise fallback on channel by page extraction
2858 channel_page = self._download_webpage(
2859 url + '?view=57', channel_id,
2860 'Downloading channel page', fatal=False)
2b3c2546
PH
2861 if channel_page is False:
2862 channel_playlist_id = False
2863 else:
2864 channel_playlist_id = self._html_search_meta(
2865 'channelId', channel_page, 'channel id', default=None)
2866 if not channel_playlist_id:
73c4ac2c
S
2867 channel_url = self._html_search_meta(
2868 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
2869 channel_page, 'channel url', default=None)
2870 if channel_url:
2871 channel_playlist_id = self._search_regex(
2872 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
2873 channel_url, 'channel id', default=None)
386bdfa6
S
2874 if channel_playlist_id and channel_playlist_id.startswith('UC'):
2875 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
2876 return self.url_result(
2877 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 2878
60bf45c8 2879 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
2880 autogenerated = re.search(r'''(?x)
2881 class="[^"]*?(?:
2882 channel-header-autogenerated-label|
2883 yt-channel-title-autogenerated
2884 )[^"]*"''', channel_page) is not None
c5e8d7af 2885
b9643eed
JMF
2886 if autogenerated:
2887 # The videos are contained in a single page
2888 # the ajax pages can't be used, they are empty
b82f815f 2889 entries = [
fb69240c
S
2890 self.url_result(
2891 video_id, 'Youtube', video_id=video_id,
2892 video_title=video_title)
8f02ad4f 2893 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
2894 return self.playlist_result(entries, channel_id)
2895
73c4ac2c
S
2896 try:
2897 next(self._entries(channel_page, channel_id))
2898 except StopIteration:
2899 alert_message = self._html_search_regex(
2900 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
2901 channel_page, 'alert', default=None, group='alert')
2902 if alert_message:
2903 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
2904
648e6a1f 2905 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
2906
2907
eb0f3e7e 2908class YoutubeUserIE(YoutubeChannelIE):
78caa52a 2909 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
ea696249 2910 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
9558dcec 2911 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 2912 IE_NAME = 'youtube:user'
c5e8d7af 2913
cdc628a4
PH
2914 _TESTS = [{
2915 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
2916 'playlist_mincount': 320,
2917 'info_dict': {
73c4ac2c
S
2918 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
2919 'title': 'Uploads from The Linux Foundation',
13a75688
S
2920 'uploader': 'The Linux Foundation',
2921 'uploader_id': 'TheLinuxFoundation',
cdc628a4 2922 }
9558dcec
S
2923 }, {
2924 # Only available via https://www.youtube.com/c/12minuteathlete/videos
2925 # but not https://www.youtube.com/user/12minuteathlete/videos
2926 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
2927 'playlist_mincount': 249,
2928 'info_dict': {
2929 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
2930 'title': 'Uploads from 12 Minute Athlete',
13a75688
S
2931 'uploader': '12 Minute Athlete',
2932 'uploader_id': 'the12minuteathlete',
9558dcec 2933 }
cdc628a4
PH
2934 }, {
2935 'url': 'ytuser:phihag',
2936 'only_matching': True,
daa0df9e
YCH
2937 }, {
2938 'url': 'https://www.youtube.com/c/gametrailers',
2939 'only_matching': True,
9558dcec
S
2940 }, {
2941 'url': 'https://www.youtube.com/gametrailers',
2942 'only_matching': True,
73c4ac2c 2943 }, {
0e879f43 2944 # This channel is not available, geo restricted to JP
73c4ac2c
S
2945 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
2946 'only_matching': True,
cdc628a4
PH
2947 }]
2948
e3ea4790 2949 @classmethod
f4b05232 2950 def suitable(cls, url):
e3ea4790
JMF
2951 # Don't return True if the url can be extracted with other youtube
2952 # extractor, the regex would is too permissive and it would match.
f3a58d46 2953 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
2954 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
2955 return False
2956 else:
2957 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 2958
9558dcec
S
2959 def _build_template_url(self, url, channel_id):
2960 mobj = re.match(self._VALID_URL, url)
2961 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
2962
b05654f0 2963
f07e276a
S
2964class YoutubeLiveIE(YoutubeBaseInfoExtractor):
2965 IE_DESC = 'YouTube.com live streams'
073d5bf5 2966 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
f07e276a
S
2967 IE_NAME = 'youtube:live'
2968
2969 _TESTS = [{
2d3d2997 2970 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
f07e276a
S
2971 'info_dict': {
2972 'id': 'a48o2S1cPoo',
2973 'ext': 'mp4',
2974 'title': 'The Young Turks - Live Main Show',
2975 'uploader': 'The Young Turks',
2976 'uploader_id': 'TheYoungTurks',
ec85ded8 2977 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
f07e276a
S
2978 'upload_date': '20150715',
2979 'license': 'Standard YouTube License',
2980 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2981 'categories': ['News & Politics'],
2982 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2983 'like_count': int,
2984 'dislike_count': int,
2985 },
2986 'params': {
2987 'skip_download': True,
2988 },
2989 }, {
2d3d2997 2990 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
f07e276a 2991 'only_matching': True,
c1b2a085
S
2992 }, {
2993 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2994 'only_matching': True,
073d5bf5
S
2995 }, {
2996 'url': 'https://www.youtube.com/TheYoungTurks/live',
2997 'only_matching': True,
f07e276a
S
2998 }]
2999
3000 def _real_extract(self, url):
3001 mobj = re.match(self._VALID_URL, url)
3002 channel_id = mobj.group('id')
3003 base_url = mobj.group('base_url')
3004 webpage = self._download_webpage(url, channel_id, fatal=False)
3005 if webpage:
3006 page_type = self._og_search_property(
e7f3529f 3007 'type', webpage, 'page type', default='')
f07e276a
S
3008 video_id = self._html_search_meta(
3009 'videoId', webpage, 'video id', default=None)
e7f3529f
S
3010 if page_type.startswith('video') and video_id and re.match(
3011 r'^[0-9A-Za-z_-]{11}$', video_id):
f07e276a
S
3012 return self.url_result(video_id, YoutubeIE.ie_key())
3013 return self.url_result(base_url)
3014
3015
e462474e
S
3016class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3017 IE_DESC = 'YouTube.com user/channel playlists'
3018 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
3019 IE_NAME = 'youtube:playlists'
0c148415 3020
e568c223 3021 _TESTS = [{
2d3d2997 3022 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
0c148415
S
3023 'playlist_mincount': 4,
3024 'info_dict': {
3025 'id': 'ThirstForScience',
13a75688 3026 'title': 'ThirstForScience',
0c148415 3027 },
e568c223
S
3028 }, {
3029 # with "Load more" button
2d3d2997 3030 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
e568c223
S
3031 'playlist_mincount': 70,
3032 'info_dict': {
3033 'id': 'igorkle1',
3034 'title': 'Игорь Клейнер',
3035 },
e462474e
S
3036 }, {
3037 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3038 'playlist_mincount': 17,
3039 'info_dict': {
3040 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3041 'title': 'Chem Player',
3042 },
13a75688 3043 'skip': 'Blocked',
e568c223 3044 }]
0c148415
S
3045
3046
870f3bfc
S
3047class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3048 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3049
3050
3051class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
78caa52a 3052 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
3053 # there doesn't appear to be a real limit, for example if you search for
3054 # 'python' you get more than 8.000.000 results
3055 _MAX_RESULTS = float('inf')
78caa52a 3056 IE_NAME = 'youtube:search'
b05654f0 3057 _SEARCH_KEY = 'ytsearch'
b4c08069 3058 _EXTRA_QUERY_ARGS = {}
9dd8e46a 3059 _TESTS = []
b05654f0 3060
b05654f0
PH
3061 def _get_n_results(self, query, n):
3062 """Get a specified number of results for a query"""
3063
b4c08069 3064 videos = []
b05654f0
PH
3065 limit = n
3066
a22b2fd1
YCH
3067 url_query = {
3068 'search_query': query.encode('utf-8'),
3069 }
3070 url_query.update(self._EXTRA_QUERY_ARGS)
3071 result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
3072
b4c08069 3073 for pagenum in itertools.count(1):
b4c08069 3074 data = self._download_json(
69ea8ca4 3075 result_url, video_id='query "%s"' % query,
b4c08069 3076 note='Downloading page %s' % pagenum,
a22b2fd1
YCH
3077 errnote='Unable to download API page',
3078 query={'spf': 'navigate'})
b4c08069 3079 html_content = data[1]['body']['content']
7cc3570e 3080
b4c08069 3081 if 'class="search-message' in html_content:
07ad22b8 3082 raise ExtractorError(
78caa52a 3083 '[youtube] No video results', expected=True)
b05654f0 3084
870f3bfc 3085 new_videos = list(self._process_page(html_content))
b4c08069
JMF
3086 videos += new_videos
3087 if not new_videos or len(videos) > limit:
3088 break
a22b2fd1
YCH
3089 next_link = self._html_search_regex(
3090 r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
3091 html_content, 'next link', default=None)
3092 if next_link is None:
3093 break
3094 result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
b05654f0 3095
b4c08069
JMF
3096 if len(videos) > n:
3097 videos = videos[:n]
b05654f0 3098 return self.playlist_result(videos, query)
75dff0ee 3099
c9ae7b95 3100
a3dd9248 3101class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3102 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3103 _SEARCH_KEY = 'ytsearchdate'
78caa52a 3104 IE_DESC = 'YouTube.com searches, newest videos first'
b4c08069 3105 _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
75dff0ee 3106
c9ae7b95 3107
870f3bfc 3108class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
78caa52a
PH
3109 IE_DESC = 'YouTube.com search URLs'
3110 IE_NAME = 'youtube:search_url'
d2c1f79f 3111 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
cdc628a4
PH
3112 _TESTS = [{
3113 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3114 'playlist_mincount': 5,
3115 'info_dict': {
3116 'title': 'youtube-dl test video',
3117 }
d2c1f79f
S
3118 }, {
3119 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3120 'only_matching': True,
cdc628a4 3121 }]
c9ae7b95
PH
3122
3123 def _real_extract(self, url):
3124 mobj = re.match(self._VALID_URL, url)
7fd002c0 3125 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 3126 webpage = self._download_webpage(url, query)
175c2e9e 3127 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
3128
3129
136dadde 3130class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 3131 IE_DESC = 'YouTube.com (multi-season) shows'
92519402 3132 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 3133 IE_NAME = 'youtube:show'
cdc628a4 3134 _TESTS = [{
4003bd82 3135 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 3136 'playlist_mincount': 5,
cdc628a4
PH
3137 'info_dict': {
3138 'id': 'airdisasters',
3139 'title': 'Air Disasters',
3140 }
3141 }]
75dff0ee
JMF
3142
3143 def _real_extract(self, url):
136dadde
S
3144 playlist_id = self._match_id(url)
3145 return super(YoutubeShowIE, self)._real_extract(
3146 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
3147
3148
b2e8bc1b 3149class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 3150 """
25f14e9f 3151 Base class for feed extractors
d7ae0639
JMF
3152 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3153 """
b2e8bc1b 3154 _LOGIN_REQUIRED = True
d7ae0639
JMF
3155
3156 @property
3157 def IE_NAME(self):
78caa52a 3158 return 'youtube:%s' % self._FEED_NAME
04cc9617 3159
81f0259b 3160 def _real_initialize(self):
b2e8bc1b 3161 self._login()
81f0259b 3162
3853309f 3163 def _entries(self, page):
2bc43303
JMF
3164 # The extraction process is the same as for playlists, but the regex
3165 # for the video ids doesn't contain an index
3166 ids = []
3167 more_widget_html = content_html = page
2bc43303
JMF
3168 for page_num in itertools.count(1):
3169 matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
62c95fd5
S
3170
3171 # 'recommended' feed has infinite 'load more' and each new portion spins
3172 # the same videos in (sometimes) slightly different order, so we'll check
3173 # for unicity and break when portion has no new videos
3853309f 3174 new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
62c95fd5
S
3175 if not new_ids:
3176 break
3177
2bc43303
JMF
3178 ids.extend(new_ids)
3179
3853309f
S
3180 for entry in self._ids_to_results(new_ids):
3181 yield entry
3182
2bc43303
JMF
3183 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
3184 if not mobj:
3185 break
3186
3187 more = self._download_json(
25f14e9f 3188 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
2bc43303
JMF
3189 'Downloading page #%s' % page_num,
3190 transform_source=uppercase_escape)
3191 content_html = more['content_html']
3192 more_widget_html = more['load_more_widget_html']
3193
3853309f
S
3194 def _real_extract(self, url):
3195 page = self._download_webpage(
3196 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3197 self._PLAYLIST_TITLE)
25f14e9f 3198 return self.playlist_result(
3853309f 3199 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
25f14e9f
S
3200
3201
3202class YoutubeWatchLaterIE(YoutubePlaylistIE):
3203 IE_NAME = 'youtube:watchlater'
3204 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
92519402 3205 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 3206
bc7a9cd8
S
3207 _TESTS = [{
3208 'url': 'https://www.youtube.com/playlist?list=WL',
3209 'only_matching': True,
3210 }, {
3211 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3212 'only_matching': True,
3213 }]
25f14e9f
S
3214
3215 def _real_extract(self, url):
7e5dc339 3216 _, video = self._check_download_just_video(url, 'WL')
ebf1b291
S
3217 if video:
3218 return video
dacb3a86
S
3219 _, playlist = self._extract_playlist('WL')
3220 return playlist
f459d170 3221
5f6a1245 3222
c626a3d9 3223class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 3224 IE_NAME = 'youtube:favorites'
f3a34072 3225 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
92519402 3226 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
3227 _LOGIN_REQUIRED = True
3228
3229 def _real_extract(self, url):
3230 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 3231 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 3232 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
3233
3234
25f14e9f
S
3235class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3236 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
92519402 3237 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
25f14e9f
S
3238 _FEED_NAME = 'recommended'
3239 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 3240
1ed5b5c9 3241
25f14e9f
S
3242class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3243 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
92519402 3244 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
25f14e9f
S
3245 _FEED_NAME = 'subscriptions'
3246 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 3247
1ed5b5c9 3248
25f14e9f
S
3249class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3250 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
92519402 3251 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
25f14e9f
S
3252 _FEED_NAME = 'history'
3253 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
3254
3255
15870e90
PH
3256class YoutubeTruncatedURLIE(InfoExtractor):
3257 IE_NAME = 'youtube:truncated_url'
3258 IE_DESC = False # Do not list
975d35db 3259 _VALID_URL = r'''(?x)
b95aab84
PH
3260 (?:https?://)?
3261 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3262 (?:watch\?(?:
c4808c60 3263 feature=[a-z_]+|
b95aab84
PH
3264 annotation_id=annotation_[^&]+|
3265 x-yt-cl=[0-9]+|
c1708b89 3266 hl=[^&]*|
287be8c6 3267 t=[0-9]+
b95aab84
PH
3268 )?
3269 |
3270 attribution_link\?a=[^&]+
3271 )
3272 $
975d35db 3273 '''
15870e90 3274
c4808c60 3275 _TESTS = [{
2d3d2997 3276 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3277 'only_matching': True,
dc2fc736 3278 }, {
2d3d2997 3279 'url': 'https://www.youtube.com/watch?',
dc2fc736 3280 'only_matching': True,
b95aab84
PH
3281 }, {
3282 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3283 'only_matching': True,
3284 }, {
3285 'url': 'https://www.youtube.com/watch?feature=foo',
3286 'only_matching': True,
c1708b89
PH
3287 }, {
3288 'url': 'https://www.youtube.com/watch?hl=en-GB',
3289 'only_matching': True,
287be8c6
PH
3290 }, {
3291 'url': 'https://www.youtube.com/watch?t=2372',
3292 'only_matching': True,
c4808c60
PH
3293 }]
3294
15870e90
PH
3295 def _real_extract(self, url):
3296 raise ExtractorError(
78caa52a
PH
3297 'Did you forget to quote the URL? Remember that & is a meta '
3298 'character in most shells, so you want to put the URL in quotes, '
3299 'like youtube-dl '
2d3d2997 3300 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
78caa52a 3301 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3302 expected=True)
772fd5cc
PH
3303
3304
3305class YoutubeTruncatedIDIE(InfoExtractor):
3306 IE_NAME = 'youtube:truncated_id'
3307 IE_DESC = False # Do not list
b95aab84 3308 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3309
3310 _TESTS = [{
3311 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3312 'only_matching': True,
3313 }]
3314
3315 def _real_extract(self, url):
3316 video_id = self._match_id(url)
3317 raise ExtractorError(
3318 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3319 expected=True)