]> jfr.im git - yt-dlp.git/blame - youtube_dlc/extractor/youtube.py
Merge pull request #57 from insaneracist/youtube-mix-fix
[yt-dlp.git] / youtube_dlc / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
5
0ca96d48 6import itertools
c5e8d7af 7import json
c4417ddb 8import os.path
d77ab8e2 9import random
c5e8d7af 10import re
42939b61 11import time
e0df6211 12import traceback
c5e8d7af 13
b05654f0 14from .common import InfoExtractor, SearchInfoExtractor
2b25cb5d 15from ..jsinterp import JSInterpreter
54256267 16from ..swfinterp import SWFInterpreter
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
f8c55c66 19 compat_HTTPError,
8d81f3e3 20 compat_kwargs,
c5e8d7af 21 compat_parse_qs,
7fd002c0
S
22 compat_urllib_parse_unquote,
23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
c5e8d7af 27 compat_str,
4bb4a188
PH
28)
29from ..utils import (
27019dbb 30 bool_or_none,
c5e8d7af 31 clean_html,
9b9c5355 32 error_to_compat_str,
351f37c0 33 extract_attributes,
c5e8d7af 34 ExtractorError,
2d30521a 35 float_or_none,
4bb4a188
PH
36 get_element_by_attribute,
37 get_element_by_id,
dd27fd17 38 int_or_none,
94278f72 39 mimetype2ext,
4bb4a188 40 orderedSet,
6310acf5 41 parse_codecs,
b84071c0 42 parse_count,
7c80519c 43 parse_duration,
0cb58b02 44 remove_quotes,
3995d37d 45 remove_start,
cf7e015f 46 smuggle_url,
dbdaaa23 47 str_or_none,
c93d53f5 48 str_to_int,
556dbe7f 49 try_get,
c5e8d7af
PH
50 unescapeHTML,
51 unified_strdate,
cf7e015f 52 unsmuggle_url,
81c2f20b 53 uppercase_escape,
21c340b8 54 url_or_none,
6e6bc8da 55 urlencode_postdata,
c5e8d7af
PH
56)
57
5f6a1245 58
de7f3446 59class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
60 """Provide base functions for Youtube extractors"""
61 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 62 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
63
64 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
65 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
66 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 67
b2e8bc1b
JMF
68 _NETRC_MACHINE = 'youtube'
69 # If True it will raise an error if no login info is provided
70 _LOGIN_REQUIRED = False
71
66b48727 72 _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
d0ba5587 73
d84b21b4
S
74 _YOUTUBE_CLIENT_HEADERS = {
75 'x-youtube-client-name': '1',
76 'x-youtube-client-version': '1.20200609.04.02',
77 }
78
b2e8bc1b 79 def _set_language(self):
810fb84d 80 self._set_cookie(
ee0b726c 81 '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
42939b61 82 # YouTube sets the expire time to about two months
810fb84d 83 expire_time=time.time() + 2 * 30 * 24 * 3600)
b2e8bc1b 84
25f14e9f
S
85 def _ids_to_results(self, ids):
86 return [
87 self.url_result(vid_id, 'Youtube', video_id=vid_id)
88 for vid_id in ids]
89
b2e8bc1b 90 def _login(self):
83317f69 91 """
92 Attempt to log in to YouTube.
93 True is returned if successful or skipped.
94 False is returned if login failed.
95
96 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
97 """
68217024 98 username, password = self._get_login_info()
b2e8bc1b
JMF
99 # No authentication to be performed
100 if username is None:
70d35d16 101 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 102 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
957c523e
U
103 if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
104 self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 105 return True
b2e8bc1b 106
7cc3570e
PH
107 login_page = self._download_webpage(
108 self._LOGIN_URL, None,
69ea8ca4
PH
109 note='Downloading login page',
110 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
111 if login_page is False:
112 return
b2e8bc1b 113
1212e997 114 login_form = self._hidden_inputs(login_page)
c5e8d7af 115
e00eb564
S
116 def req(url, f_req, note, errnote):
117 data = login_form.copy()
118 data.update({
119 'pstMsg': 1,
120 'checkConnection': 'youtube',
121 'checkedDomains': 'youtube',
122 'hl': 'en',
123 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 124 'f.req': json.dumps(f_req),
e00eb564
S
125 'flowName': 'GlifWebSignIn',
126 'flowEntry': 'ServiceLogin',
baf67a60
S
127 # TODO: reverse actual botguard identifier generation algo
128 'bgRequest': '["identifier",""]',
041bc3ad 129 })
e00eb564
S
130 return self._download_json(
131 url, None, note=note, errnote=errnote,
132 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
133 fatal=False,
134 data=urlencode_postdata(data), headers={
135 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
136 'Google-Accounts-XSRF': 1,
137 })
138
3995d37d
S
139 def warn(message):
140 self._downloader.report_warning(message)
141
142 lookup_req = [
143 username,
144 None, [], None, 'US', None, None, 2, False, True,
145 [
146 None, None,
147 [2, 1, None, 1,
148 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
149 None, [], 4],
150 1, [None, None, []], None, None, None, True
151 ],
152 username,
153 ]
154
e00eb564 155 lookup_results = req(
3995d37d 156 self._LOOKUP_URL, lookup_req,
e00eb564
S
157 'Looking up account info', 'Unable to look up account info')
158
159 if lookup_results is False:
160 return False
041bc3ad 161
3995d37d
S
162 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
163 if not user_hash:
164 warn('Unable to extract user hash')
165 return False
166
167 challenge_req = [
168 user_hash,
169 None, 1, None, [1, None, None, None, [password, None, True]],
170 [
171 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
172 1, [None, None, []], None, None, None, True
173 ]]
83317f69 174
3995d37d
S
175 challenge_results = req(
176 self._CHALLENGE_URL, challenge_req,
177 'Logging in', 'Unable to log in')
83317f69 178
3995d37d 179 if challenge_results is False:
e00eb564 180 return
83317f69 181
3995d37d
S
182 login_res = try_get(challenge_results, lambda x: x[0][5], list)
183 if login_res:
184 login_msg = try_get(login_res, lambda x: x[5], compat_str)
185 warn(
186 'Unable to login: %s' % 'Invalid password'
187 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
188 return False
189
190 res = try_get(challenge_results, lambda x: x[0][-1], list)
191 if not res:
192 warn('Unable to extract result entry')
193 return False
194
9a6628aa
S
195 login_challenge = try_get(res, lambda x: x[0][0], list)
196 if login_challenge:
197 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
198 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
199 # SEND_SUCCESS - TFA code has been successfully sent to phone
200 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 201 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
202 if status == 'QUOTA_EXCEEDED':
203 warn('Exceeded the limit of TFA codes, try later')
204 return False
205
206 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
207 if not tl:
208 warn('Unable to extract TL')
209 return False
210
211 tfa_code = self._get_tfa_info('2-step verification code')
212
213 if not tfa_code:
214 warn(
215 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
216 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
217 return False
218
219 tfa_code = remove_start(tfa_code, 'G-')
220
221 tfa_req = [
222 user_hash, None, 2, None,
223 [
224 9, None, None, None, None, None, None, None,
225 [None, tfa_code, True, 2]
226 ]]
227
228 tfa_results = req(
229 self._TFA_URL.format(tl), tfa_req,
230 'Submitting TFA code', 'Unable to submit TFA code')
231
232 if tfa_results is False:
233 return False
234
235 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
236 if tfa_res:
237 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
238 warn(
239 'Unable to finish TFA: %s' % 'Invalid TFA code'
240 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
241 return False
242
243 check_cookie_url = try_get(
244 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
245 else:
246 CHALLENGES = {
247 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
248 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
249 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
250 }
251 challenge = CHALLENGES.get(
252 challenge_str,
253 '%s returned error %s.' % (self.IE_NAME, challenge_str))
254 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
255 return False
3995d37d
S
256 else:
257 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
258
259 if not check_cookie_url:
260 warn('Unable to extract CheckCookie URL')
261 return False
e00eb564
S
262
263 check_cookie_results = self._download_webpage(
3995d37d
S
264 check_cookie_url, None, 'Checking cookie', fatal=False)
265
266 if check_cookie_results is False:
267 return False
e00eb564 268
3995d37d
S
269 if 'https://myaccount.google.com/' not in check_cookie_results:
270 warn('Unable to log in')
b2e8bc1b 271 return False
e00eb564 272
b2e8bc1b
JMF
273 return True
274
30226342 275 def _download_webpage_handle(self, *args, **kwargs):
c1148516
S
276 query = kwargs.get('query', {}).copy()
277 query['disable_polymer'] = 'true'
278 kwargs['query'] = query
30226342 279 return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
8d81f3e3
S
280 *args, **compat_kwargs(kwargs))
281
5b0a6a80 282 def _get_yt_initial_data(self, video_id, webpage):
283 config = self._search_regex(
284 (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
285 r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
286 webpage, 'ytInitialData', default=None)
287 if config:
288 return self._parse_json(
289 uppercase_escape(config), video_id, fatal=False)
290
b2e8bc1b
JMF
291 def _real_initialize(self):
292 if self._downloader is None:
293 return
42939b61 294 self._set_language()
b2e8bc1b
JMF
295 if not self._login():
296 return
c5e8d7af 297
8377574c 298
8e7aad20 299class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
061a75ed 300 # Extract entries from page with "Load more" button
648e6a1f
S
301 def _entries(self, page, playlist_id):
302 more_widget_html = content_html = page
303 for page_num in itertools.count(1):
061a75ed
S
304 for entry in self._process_page(content_html):
305 yield entry
648e6a1f
S
306
307 mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
308 if not mobj:
309 break
310
f8c55c66
S
311 count = 0
312 retries = 3
313 while count <= retries:
314 try:
315 # Downloading page may result in intermittent 5xx HTTP error
316 # that is usually worked around with a retry
317 more = self._download_json(
07af16b9 318 'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
f8c55c66
S
319 'Downloading page #%s%s'
320 % (page_num, ' (retry #%d)' % count if count else ''),
d84b21b4
S
321 transform_source=uppercase_escape,
322 headers=self._YOUTUBE_CLIENT_HEADERS)
f8c55c66
S
323 break
324 except ExtractorError as e:
325 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
326 count += 1
327 if count <= retries:
328 continue
329 raise
330
648e6a1f
S
331 content_html = more['content_html']
332 if not content_html.strip():
333 # Some webpages show a "Load more" button but they don't
334 # have more videos
335 break
336 more_widget_html = more['load_more_widget_html']
337
061a75ed
S
338
339class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
340 def _process_page(self, content):
341 for video_id, video_title in self.extract_videos_from_page(content):
342 yield self.url_result(video_id, 'Youtube', video_id, video_title)
343
351f37c0
S
344 def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
345 for mobj in re.finditer(video_re, page):
648e6a1f
S
346 # The link with index 0 is not the first video of the playlist (not sure if still actual)
347 if 'index' in mobj.groupdict() and mobj.group('id') == '0':
348 continue
349 video_id = mobj.group('id')
351f37c0
S
350 video_title = unescapeHTML(
351 mobj.group('title')) if 'title' in mobj.groupdict() else None
648e6a1f
S
352 if video_title:
353 video_title = video_title.strip()
351f37c0
S
354 if video_title == '► Play all':
355 video_title = None
648e6a1f
S
356 try:
357 idx = ids_in_page.index(video_id)
358 if video_title and not titles_in_page[idx]:
359 titles_in_page[idx] = video_title
360 except ValueError:
361 ids_in_page.append(video_id)
362 titles_in_page.append(video_title)
351f37c0
S
363
364 def extract_videos_from_page(self, page):
365 ids_in_page = []
366 titles_in_page = []
367 self.extract_videos_from_page_impl(
368 self._VIDEO_RE, page, ids_in_page, titles_in_page)
648e6a1f
S
369 return zip(ids_in_page, titles_in_page)
370
371
061a75ed
S
372class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
373 def _process_page(self, content):
6dee688e
S
374 for playlist_id in orderedSet(re.findall(
375 r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
376 content)):
061a75ed
S
377 yield self.url_result(
378 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
379
0c148415
S
380 def _real_extract(self, url):
381 playlist_id = self._match_id(url)
382 webpage = self._download_webpage(url, playlist_id)
0c148415 383 title = self._og_search_title(webpage, fatal=False)
061a75ed 384 return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
0c148415
S
385
386
360e1ca5 387class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 388 IE_DESC = 'YouTube.com'
cb7dfeea 389 _VALID_URL = r"""(?x)^
c5e8d7af 390 (
edb53e2d 391 (?:https?://|//) # http(s):// or protocol-independent URL
66b48727 392 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
484aaeb2 393 (?:www\.)?deturl\.com/www\.youtube\.com/|
e70dc1d1 394 (?:www\.)?pwnyoutube\.com/|
8b561bfc 395 (?:www\.)?hooktube\.com/|
f7000f3a 396 (?:www\.)?yourepeat\.com/|
e69ae5b9 397 tube\.majestyc\.net/|
ba036333 398 # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
77d95677 399 (?:(?:www|dev)\.)?invidio\.us/|
ba036333 400 (?:(?:www|no)\.)?invidiou\.sh/|
401 (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
8ae113ca 402 (?:www\.)?invidious\.kabi\.tk/|
ba036333 403 (?:www\.)?invidious\.13ad\.de/|
791d2e81 404 (?:www\.)?invidious\.mastodon\.host/|
494d664e 405 (?:www\.)?invidious\.nixnet\.xyz/|
666d808e 406 (?:www\.)?invidious\.drycat\.fr/|
ba036333 407 (?:www\.)?tube\.poal\.co/|
8ae113ca 408 (?:www\.)?vid\.wxzm\.sx/|
384bf91f 409 (?:www\.)?yewtu\.be/|
494d664e 410 (?:www\.)?yt\.elukerio\.org/|
894b3826 411 (?:www\.)?yt\.lelux\.fi/|
1db5ab6b 412 (?:www\.)?invidious\.ggc-project\.de/|
413 (?:www\.)?yt\.maisputain\.ovh/|
414 (?:www\.)?invidious\.13ad\.de/|
415 (?:www\.)?invidious\.toot\.koeln/|
416 (?:www\.)?invidious\.fdn\.fr/|
417 (?:www\.)?watch\.nettohikari\.com/|
bff90fc5 418 (?:www\.)?kgg2m7yk5aybusll\.onion/|
419 (?:www\.)?qklhadlycap4cnod\.onion/|
420 (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
421 (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
422 (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
423 (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
33c1c7d8 424 (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
1db5ab6b 425 (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
e69ae5b9 426 youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
c5e8d7af
PH
427 (?:.*?\#/)? # handle anchor (#/) redirect urls
428 (?: # the various things that can precede the ID:
ac7553d0 429 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 430 |(?: # or the v= param in all its forms
f7000f3a 431 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 432 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 433 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
434 v=
435 )
f4b05232 436 ))
cbaed4bb
S
437 |(?:
438 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
439 vid\.plus| # or vid.plus/xxxx
440 zwearz\.com/watch| # or zwearz.com/watch/xxxx
cbaed4bb 441 )/
edb53e2d 442 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 443 )
c5e8d7af 444 )? # all until now is optional -> you can pass the naked ID
8963d9c2 445 ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
446 (?!.*?\blist=
447 (?:
448 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
449 WL # WL are handled by the watch later IE
450 )
451 )
c5e8d7af 452 (?(1).+)? # if we found the ID, everything can follow
d0ba5587 453 $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
c5e8d7af 454 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
e40c758c
S
455 _PLAYER_INFO_RE = (
456 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
457 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
458 )
2c62dc26 459 _formats = {
c2d3cb4c 460 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
461 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
462 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
463 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
464 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
465 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
466 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
467 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 468 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 469 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
470 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
471 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
472 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
473 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
474 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 475 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 476 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
477 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 478
479
480 # 3D videos
c2d3cb4c 481 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
482 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
483 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
484 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 485 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
486 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
487 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 488
96fb5605 489 # Apple HTTP Live Streaming
11f12195 490 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 491 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
492 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
493 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
494 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
495 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 496 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
497 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
498
499 # DASH mp4 video
d23028a8
S
500 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
501 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
502 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
503 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
504 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 505 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
506 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
507 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
508 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
509 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
510 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
511 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 512
f6f1fc92 513 # Dash mp4 audio
d23028a8
S
514 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
515 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
516 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
517 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
518 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
519 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
520 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
521
522 # Dash webm
d23028a8
S
523 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
524 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
525 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
526 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
527 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
528 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
529 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
530 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
531 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
532 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
533 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
534 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
535 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
536 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
537 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 538 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
539 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
540 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
541 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
542 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
543 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
544 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
545
546 # Dash webm audio
d23028a8
S
547 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
548 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 549
0857baad 550 # Dash webm audio with opus inside
d23028a8
S
551 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
552 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
553 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 554
ce6b9a2d
PH
555 # RTMP (unnamed)
556 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
557
558 # av01 video only formats sometimes served with "unknown" codecs
559 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
560 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
561 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
562 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 563 }
84da5d84 564 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 565
fd5c4aab
S
566 _GEO_BYPASS = False
567
78caa52a 568 IE_NAME = 'youtube'
2eb88d95
PH
569 _TESTS = [
570 {
2d3d2997 571 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
572 'info_dict': {
573 'id': 'BaW_jenozKc',
574 'ext': 'mp4',
3867038a 575 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
576 'uploader': 'Philipp Hagemeister',
577 'uploader_id': 'phihag',
ec85ded8 578 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
579 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
580 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 581 'upload_date': '20121002',
3867038a 582 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 583 'categories': ['Science & Technology'],
3867038a 584 'tags': ['youtube-dl'],
556dbe7f 585 'duration': 10,
dbdaaa23 586 'view_count': int,
3e7c1224
PH
587 'like_count': int,
588 'dislike_count': int,
7c80519c 589 'start_time': 1,
297a564b 590 'end_time': 9,
2eb88d95 591 }
0e853ca4 592 },
fccd3771 593 {
4bc3a23e
PH
594 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
595 'note': 'Embed-only video (#1746)',
596 'info_dict': {
597 'id': 'yZIXLfi8CZQ',
598 'ext': 'mp4',
599 'upload_date': '20120608',
600 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
601 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
602 'uploader': 'SET India',
94bfcd23 603 'uploader_id': 'setindia',
ec85ded8 604 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 605 'age_limit': 18,
fccd3771
PH
606 }
607 },
11b56058 608 {
2d3d2997 609 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
11b56058
PM
610 'note': 'Use the first video ID in the URL',
611 'info_dict': {
612 'id': 'BaW_jenozKc',
613 'ext': 'mp4',
3867038a 614 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
615 'uploader': 'Philipp Hagemeister',
616 'uploader_id': 'phihag',
ec85ded8 617 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 618 'upload_date': '20121002',
3867038a 619 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 620 'categories': ['Science & Technology'],
3867038a 621 'tags': ['youtube-dl'],
556dbe7f 622 'duration': 10,
dbdaaa23 623 'view_count': int,
11b56058
PM
624 'like_count': int,
625 'dislike_count': int,
34a7de29
S
626 },
627 'params': {
628 'skip_download': True,
629 },
11b56058 630 },
dd27fd17 631 {
2d3d2997 632 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
633 'note': '256k DASH audio (format 141) via DASH manifest',
634 'info_dict': {
635 'id': 'a9LDPn-MO4I',
636 'ext': 'm4a',
637 'upload_date': '20121002',
638 'uploader_id': '8KVIDEO',
ec85ded8 639 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
640 'description': '',
641 'uploader': '8KVIDEO',
642 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 643 },
4bc3a23e
PH
644 'params': {
645 'youtube_include_dash_manifest': True,
646 'format': '141',
4919603f 647 },
de3c7fe0 648 'skip': 'format 141 not served anymore',
dd27fd17 649 },
aa79ac0c
PH
650 # Controversy video
651 {
652 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
653 'info_dict': {
654 'id': 'T4XJQO3qol8',
655 'ext': 'mp4',
556dbe7f 656 'duration': 219,
aa79ac0c 657 'upload_date': '20100909',
4fe54c12 658 'uploader': 'Amazing Atheist',
aa79ac0c 659 'uploader_id': 'TheAmazingAtheist',
ec85ded8 660 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c
PH
661 'title': 'Burning Everyone\'s Koran',
662 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
663 }
c522adb1 664 },
dd2d55f1 665 # Normal age-gate video (embed allowed)
c522adb1 666 {
2d3d2997 667 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
668 'info_dict': {
669 'id': 'HtVdAasjOgU',
670 'ext': 'mp4',
671 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 672 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 673 'duration': 142,
c522adb1
JMF
674 'uploader': 'The Witcher',
675 'uploader_id': 'WitcherGame',
ec85ded8 676 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 677 'upload_date': '20140605',
34952f09 678 'age_limit': 18,
c522adb1
JMF
679 },
680 },
067aa17e 681 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
682 {
683 'url': 'lqQg6PlCWgI',
684 'info_dict': {
685 'id': 'lqQg6PlCWgI',
686 'ext': 'mp4',
556dbe7f 687 'duration': 6085,
90227264 688 'upload_date': '20150827',
cbe2bd91 689 'uploader_id': 'olympic',
ec85ded8 690 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 691 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 692 'uploader': 'Olympic',
cbe2bd91
PH
693 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
694 },
695 'params': {
696 'skip_download': 'requires avconv',
e52a40ab 697 }
cbe2bd91 698 },
6271f1ca
PH
699 # Non-square pixels
700 {
701 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
702 'info_dict': {
703 'id': '_b-2C3KPAM0',
704 'ext': 'mp4',
705 'stretched_ratio': 16 / 9.,
556dbe7f 706 'duration': 85,
6271f1ca
PH
707 'upload_date': '20110310',
708 'uploader_id': 'AllenMeow',
ec85ded8 709 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 710 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 711 'uploader': '孫ᄋᄅ',
6271f1ca
PH
712 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
713 },
06b491eb
S
714 },
715 # url_encoded_fmt_stream_map is empty string
716 {
717 'url': 'qEJwOuvDf7I',
718 'info_dict': {
719 'id': 'qEJwOuvDf7I',
f57b7835 720 'ext': 'webm',
06b491eb
S
721 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
722 'description': '',
723 'upload_date': '20150404',
724 'uploader_id': 'spbelect',
725 'uploader': 'Наблюдатели Петербурга',
726 },
727 'params': {
728 'skip_download': 'requires avconv',
e323cf3f
S
729 },
730 'skip': 'This live event has ended.',
06b491eb 731 },
067aa17e 732 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
733 {
734 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
735 'info_dict': {
736 'id': 'FIl7x6_3R5Y',
eb6793ba 737 'ext': 'webm',
da77d856
S
738 'title': 'md5:7b81415841e02ecd4313668cde88737a',
739 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 740 'duration': 220,
da77d856
S
741 'upload_date': '20150625',
742 'uploader_id': 'dorappi2000',
ec85ded8 743 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 744 'uploader': 'dorappi2000',
eb6793ba 745 'formats': 'mincount:31',
da77d856 746 },
eb6793ba 747 'skip': 'not actual anymore',
2ee8f5d8 748 },
8a1a26ce
YCH
749 # DASH manifest with segment_list
750 {
751 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
752 'md5': '8ce563a1d667b599d21064e982ab9e31',
753 'info_dict': {
754 'id': 'CsmdDsKjzN8',
755 'ext': 'mp4',
17ee98e1 756 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
757 'uploader': 'Airtek',
758 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
759 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
760 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
761 },
762 'params': {
763 'youtube_include_dash_manifest': True,
764 'format': '135', # bestvideo
be49068d
S
765 },
766 'skip': 'This live event has ended.',
2ee8f5d8 767 },
cf7e015f
S
768 {
769 # Multifeed videos (multiple cameras), URL is for Main Camera
770 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
771 'info_dict': {
772 'id': 'jqWvoWXjCVs',
773 'title': 'teamPGP: Rocket League Noob Stream',
774 'description': 'md5:dc7872fb300e143831327f1bae3af010',
775 },
776 'playlist': [{
777 'info_dict': {
778 'id': 'jqWvoWXjCVs',
779 'ext': 'mp4',
780 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
781 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 782 'duration': 7335,
cf7e015f
S
783 'upload_date': '20150721',
784 'uploader': 'Beer Games Beer',
785 'uploader_id': 'beergamesbeer',
ec85ded8 786 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 787 'license': 'Standard YouTube License',
cf7e015f
S
788 },
789 }, {
790 'info_dict': {
791 'id': '6h8e8xoXJzg',
792 'ext': 'mp4',
793 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
794 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 795 'duration': 7337,
cf7e015f
S
796 'upload_date': '20150721',
797 'uploader': 'Beer Games Beer',
798 'uploader_id': 'beergamesbeer',
ec85ded8 799 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 800 'license': 'Standard YouTube License',
cf7e015f
S
801 },
802 }, {
803 'info_dict': {
804 'id': 'PUOgX5z9xZw',
805 'ext': 'mp4',
806 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
807 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 808 'duration': 7337,
cf7e015f
S
809 'upload_date': '20150721',
810 'uploader': 'Beer Games Beer',
811 'uploader_id': 'beergamesbeer',
ec85ded8 812 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 813 'license': 'Standard YouTube License',
cf7e015f
S
814 },
815 }, {
816 'info_dict': {
817 'id': 'teuwxikvS5k',
818 'ext': 'mp4',
819 'title': 'teamPGP: Rocket League Noob Stream (zim)',
820 'description': 'md5:dc7872fb300e143831327f1bae3af010',
556dbe7f 821 'duration': 7334,
cf7e015f
S
822 'upload_date': '20150721',
823 'uploader': 'Beer Games Beer',
824 'uploader_id': 'beergamesbeer',
ec85ded8 825 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
7caf9830 826 'license': 'Standard YouTube License',
cf7e015f
S
827 },
828 }],
829 'params': {
830 'skip_download': True,
831 },
4fe54c12 832 'skip': 'This video is not available.',
cbaed4bb 833 },
f9f49d87 834 {
067aa17e 835 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
836 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
837 'info_dict': {
838 'id': 'gVfLd0zydlo',
839 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
840 },
841 'playlist_count': 2,
be49068d 842 'skip': 'Not multifeed anymore',
f9f49d87 843 },
cbaed4bb 844 {
2d3d2997 845 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 846 'only_matching': True,
0e49d9a6 847 },
6d4fc66b 848 {
2d3d2997 849 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
850 'only_matching': True,
851 },
0e49d9a6 852 {
067aa17e 853 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 854 # Also tests cut-off URL expansion in video description (see
067aa17e
S
855 # https://github.com/ytdl-org/youtube-dl/issues/1892,
856 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
857 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
858 'info_dict': {
859 'id': 'lsguqyKfVQg',
860 'ext': 'mp4',
861 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 862 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 863 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 864 'duration': 133,
0e49d9a6
LL
865 'upload_date': '20151119',
866 'uploader_id': 'IronSoulElf',
ec85ded8 867 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 868 'uploader': 'IronSoulElf',
eb6793ba
S
869 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
870 'track': 'Dark Walk - Position Music',
871 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 872 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
873 },
874 'params': {
875 'skip_download': True,
876 },
877 },
61f92af1 878 {
067aa17e 879 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
880 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
881 'only_matching': True,
882 },
313dfc45
LL
883 {
884 # Video with yt:stretch=17:0
885 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
886 'info_dict': {
887 'id': 'Q39EVAstoRM',
888 'ext': 'mp4',
889 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
890 'description': 'md5:ee18a25c350637c8faff806845bddee9',
891 'upload_date': '20151107',
892 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
893 'uploader': 'CH GAMER DROID',
894 },
895 'params': {
896 'skip_download': True,
897 },
be49068d 898 'skip': 'This video does not exist.',
313dfc45 899 },
7caf9830
S
900 {
901 # Video licensed under Creative Commons
902 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
903 'info_dict': {
904 'id': 'M4gD1WSo5mA',
905 'ext': 'mp4',
906 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
907 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 908 'duration': 721,
7caf9830
S
909 'upload_date': '20150127',
910 'uploader_id': 'BerkmanCenter',
ec85ded8 911 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 912 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
913 'license': 'Creative Commons Attribution license (reuse allowed)',
914 },
915 'params': {
916 'skip_download': True,
917 },
918 },
fd050249
S
919 {
920 # Channel-like uploader_url
921 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
922 'info_dict': {
923 'id': 'eQcmzGIKrzg',
924 'ext': 'mp4',
925 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
926 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
556dbe7f 927 'duration': 4060,
fd050249 928 'upload_date': '20151119',
eb6793ba 929 'uploader': 'Bernie Sanders',
fd050249 930 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 931 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
932 'license': 'Creative Commons Attribution license (reuse allowed)',
933 },
934 'params': {
935 'skip_download': True,
936 },
937 },
040ac686
S
938 {
939 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
940 'only_matching': True,
7f29cf54
S
941 },
942 {
067aa17e 943 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
944 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
945 'only_matching': True,
6496ccb4
S
946 },
947 {
948 # Rental video preview
949 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
950 'info_dict': {
951 'id': 'uGpuVWrhIzE',
952 'ext': 'mp4',
953 'title': 'Piku - Trailer',
954 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
955 'upload_date': '20150811',
956 'uploader': 'FlixMatrix',
957 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 958 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
959 'license': 'Standard YouTube License',
960 },
961 'params': {
962 'skip_download': True,
963 },
eb6793ba 964 'skip': 'This video is not available.',
022a5d66 965 },
12afdc2a
S
966 {
967 # YouTube Red video with episode data
968 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
969 'info_dict': {
970 'id': 'iqKdEhx-dD4',
971 'ext': 'mp4',
972 'title': 'Isolation - Mind Field (Ep 1)',
4fe54c12 973 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
556dbe7f 974 'duration': 2085,
12afdc2a
S
975 'upload_date': '20170118',
976 'uploader': 'Vsauce',
977 'uploader_id': 'Vsauce',
978 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
979 'series': 'Mind Field',
980 'season_number': 1,
981 'episode_number': 1,
982 },
983 'params': {
984 'skip_download': True,
985 },
986 'expected_warnings': [
987 'Skipping DASH manifest',
988 ],
989 },
c7121fa7
S
990 {
991 # The following content has been identified by the YouTube community
992 # as inappropriate or offensive to some audiences.
993 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
994 'info_dict': {
995 'id': '6SJNVb0GnPI',
996 'ext': 'mp4',
997 'title': 'Race Differences in Intelligence',
998 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
999 'duration': 965,
1000 'upload_date': '20140124',
1001 'uploader': 'New Century Foundation',
1002 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1003 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1004 },
1005 'params': {
1006 'skip_download': True,
1007 },
1008 },
022a5d66
S
1009 {
1010 # itag 212
1011 'url': '1t24XAntNCY',
1012 'only_matching': True,
fd5c4aab
S
1013 },
1014 {
1015 # geo restricted to JP
1016 'url': 'sJL6WA-aGkQ',
1017 'only_matching': True,
1018 },
d0ba5587
S
1019 {
1020 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
1021 'only_matching': True,
1022 },
cd5a74a2
S
1023 {
1024 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1025 'only_matching': True,
1026 },
825cd268
RA
1027 {
1028 # DRM protected
1029 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1030 'only_matching': True,
4fe54c12
S
1031 },
1032 {
1033 # Video with unsupported adaptive stream type formats
1034 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1035 'info_dict': {
1036 'id': 'Z4Vy8R84T1U',
1037 'ext': 'mp4',
1038 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1039 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1040 'duration': 433,
1041 'upload_date': '20130923',
1042 'uploader': 'Amelia Putri Harwita',
1043 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1044 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1045 'formats': 'maxcount:10',
1046 },
1047 'params': {
1048 'skip_download': True,
1049 'youtube_include_dash_manifest': False,
1050 },
5429d6a9 1051 'skip': 'not actual anymore',
5caabd3c 1052 },
1053 {
822b9d9c 1054 # Youtube Music Auto-generated description
5caabd3c 1055 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1056 'info_dict': {
1057 'id': 'MgNrAu2pzNs',
1058 'ext': 'mp4',
1059 'title': 'Voyeur Girl',
1060 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1061 'upload_date': '20190312',
5429d6a9
S
1062 'uploader': 'Stephen - Topic',
1063 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1064 'artist': 'Stephen',
1065 'track': 'Voyeur Girl',
1066 'album': 'it\'s too much love to know my dear',
1067 'release_date': '20190313',
1068 'release_year': 2019,
1069 },
1070 'params': {
1071 'skip_download': True,
1072 },
1073 },
1074 {
822b9d9c 1075 # Youtube Music Auto-generated description
5caabd3c 1076 # Retrieve 'artist' field from 'Artist:' in video description
1077 # when it is present on youtube music video
5caabd3c 1078 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
1079 'info_dict': {
1080 'id': 'k0jLE7tTwjY',
1081 'ext': 'mp4',
1082 'title': 'Latch Feat. Sam Smith',
1083 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
1084 'upload_date': '20150110',
1085 'uploader': 'Various Artists - Topic',
1086 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
1087 'artist': 'Disclosure',
1088 'track': 'Latch Feat. Sam Smith',
1089 'album': 'Latch Featuring Sam Smith',
1090 'release_date': '20121008',
1091 'release_year': 2012,
1092 },
1093 'params': {
1094 'skip_download': True,
1095 },
1096 },
1097 {
822b9d9c 1098 # Youtube Music Auto-generated description
5caabd3c 1099 # handle multiple artists on youtube music video
1100 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
1101 'info_dict': {
1102 'id': '74qn0eJSjpA',
1103 'ext': 'mp4',
1104 'title': 'Eastside',
1105 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
1106 'upload_date': '20180710',
1107 'uploader': 'Benny Blanco - Topic',
1108 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
1109 'artist': 'benny blanco, Halsey, Khalid',
1110 'track': 'Eastside',
1111 'album': 'Eastside',
1112 'release_date': '20180713',
1113 'release_year': 2018,
1114 },
1115 'params': {
1116 'skip_download': True,
1117 },
1118 },
1119 {
822b9d9c 1120 # Youtube Music Auto-generated description
5caabd3c 1121 # handle youtube music video with release_year and no release_date
1122 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
1123 'info_dict': {
1124 'id': '-hcAI0g-f5M',
1125 'ext': 'mp4',
1126 'title': 'Put It On Me',
5429d6a9 1127 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
5caabd3c 1128 'upload_date': '20180426',
1129 'uploader': 'Matt Maeson - Topic',
1130 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
1131 'artist': 'Matt Maeson',
1132 'track': 'Put It On Me',
1133 'album': 'The Hearse',
1134 'release_date': None,
1135 'release_year': 2018,
1136 },
1137 'params': {
1138 'skip_download': True,
1139 },
1140 },
66b48727
RA
1141 {
1142 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1143 'only_matching': True,
1144 },
011e75e6
S
1145 {
1146 # invalid -> valid video id redirection
1147 'url': 'DJztXj2GPfl',
1148 'info_dict': {
1149 'id': 'DJztXj2GPfk',
1150 'ext': 'mp4',
1151 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1152 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1153 'upload_date': '20090125',
1154 'uploader': 'Prochorowka',
1155 'uploader_id': 'Prochorowka',
1156 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1157 'artist': 'Panjabi MC',
1158 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1159 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1160 },
1161 'params': {
1162 'skip_download': True,
1163 },
ea74e00b
DP
1164 },
1165 {
1166 # empty description results in an empty string
1167 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1168 'info_dict': {
1169 'id': 'x41yOUIvK2k',
1170 'ext': 'mp4',
1171 'title': 'IMG 3456',
1172 'description': '',
1173 'upload_date': '20170613',
1174 'uploader_id': 'ElevageOrVert',
1175 'uploader': 'ElevageOrVert',
1176 },
1177 'params': {
1178 'skip_download': True,
1179 },
1180 },
2eb88d95
PH
1181 ]
1182
e0df6211
PH
1183 def __init__(self, *args, **kwargs):
1184 super(YoutubeIE, self).__init__(*args, **kwargs)
83799698 1185 self._player_cache = {}
e0df6211 1186
c5e8d7af
PH
1187 def report_video_info_webpage_download(self, video_id):
1188 """Report attempt to download video info webpage."""
69ea8ca4 1189 self.to_screen('%s: Downloading video info webpage' % video_id)
c5e8d7af 1190
c5e8d7af
PH
1191 def report_information_extraction(self, video_id):
1192 """Report attempt to extract video information."""
69ea8ca4 1193 self.to_screen('%s: Extracting video information' % video_id)
c5e8d7af
PH
1194
1195 def report_unavailable_format(self, video_id, format):
1196 """Report extracted video URL."""
69ea8ca4 1197 self.to_screen('%s: Format %s not available' % (video_id, format))
c5e8d7af
PH
1198
1199 def report_rtmp_download(self):
1200 """Indicate the download will use the RTMP protocol."""
69ea8ca4 1201 self.to_screen('RTMP download detected')
c5e8d7af 1202
60064c53
PH
1203 def _signature_cache_id(self, example_sig):
1204 """ Return a string representation of a signature """
78caa52a 1205 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1206
e40c758c
S
1207 @classmethod
1208 def _extract_player_info(cls, player_url):
1209 for player_re in cls._PLAYER_INFO_RE:
1210 id_m = re.search(player_re, player_url)
1211 if id_m:
1212 break
1213 else:
c081b35c 1214 raise ExtractorError('Cannot identify player %r' % player_url)
e40c758c
S
1215 return id_m.group('ext'), id_m.group('id')
1216
1217 def _extract_signature_function(self, video_id, player_url, example_sig):
1218 player_type, player_id = self._extract_player_info(player_url)
e0df6211 1219
c4417ddb 1220 # Read from filesystem cache
60064c53
PH
1221 func_id = '%s_%s_%s' % (
1222 player_type, player_id, self._signature_cache_id(example_sig))
c4417ddb 1223 assert os.path.basename(func_id) == func_id
a0e07d31 1224
69ea8ca4 1225 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1226 if cache_spec is not None:
78caa52a 1227 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1228
6d1a55a5
PH
1229 download_note = (
1230 'Downloading player %s' % player_url
1231 if self._downloader.params.get('verbose') else
1232 'Downloading %s player %s' % (player_type, player_id)
1233 )
e0df6211
PH
1234 if player_type == 'js':
1235 code = self._download_webpage(
1236 player_url, video_id,
6d1a55a5 1237 note=download_note,
69ea8ca4 1238 errnote='Download of %s failed' % player_url)
83799698 1239 res = self._parse_sig_js(code)
c4417ddb 1240 elif player_type == 'swf':
e0df6211
PH
1241 urlh = self._request_webpage(
1242 player_url, video_id,
6d1a55a5 1243 note=download_note,
69ea8ca4 1244 errnote='Download of %s failed' % player_url)
e0df6211 1245 code = urlh.read()
83799698 1246 res = self._parse_sig_swf(code)
e0df6211
PH
1247 else:
1248 assert False, 'Invalid player type %r' % player_type
1249
785521bf
PH
1250 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1251 cache_res = res(test_string)
1252 cache_spec = [ord(c) for c in cache_res]
83799698 1253
69ea8ca4 1254 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1255 return res
1256
60064c53 1257 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1258 def gen_sig_code(idxs):
1259 def _genslice(start, end, step):
78caa52a 1260 starts = '' if start == 0 else str(start)
8bcc8756 1261 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1262 steps = '' if step == 1 else (':%d' % step)
78caa52a 1263 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1264
1265 step = None
7af808a5
PH
1266 # Quelch pyflakes warnings - start will be set when step is set
1267 start = '(Never used)'
edf3e38e
PH
1268 for i, prev in zip(idxs[1:], idxs[:-1]):
1269 if step is not None:
1270 if i - prev == step:
1271 continue
1272 yield _genslice(start, prev, step)
1273 step = None
1274 continue
1275 if i - prev in [-1, 1]:
1276 step = i - prev
1277 start = prev
1278 continue
1279 else:
78caa52a 1280 yield 's[%d]' % prev
edf3e38e 1281 if step is None:
78caa52a 1282 yield 's[%d]' % i
edf3e38e
PH
1283 else:
1284 yield _genslice(start, i, step)
1285
78caa52a 1286 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1287 cache_res = func(test_string)
edf3e38e 1288 cache_spec = [ord(c) for c in cache_res]
78caa52a 1289 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1290 signature_id_tuple = '(%s)' % (
1291 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1292 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1293 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1294 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1295
e0df6211
PH
1296 def _parse_sig_js(self, jscode):
1297 funcname = self._search_regex(
abefc03f
S
1298 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1299 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
e450f6cb 1300 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1301 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1302 # Obsolete patterns
1303 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1304 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1305 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1306 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1307 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1308 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1309 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1310 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1311 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1312
1313 jsi = JSInterpreter(jscode)
1314 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1315 return lambda s: initial_function([s])
1316
1317 def _parse_sig_swf(self, file_contents):
54256267 1318 swfi = SWFInterpreter(file_contents)
78caa52a 1319 TARGET_CLASSNAME = 'SignatureDecipher'
54256267 1320 searched_class = swfi.extract_class(TARGET_CLASSNAME)
78caa52a 1321 initial_function = swfi.extract_function(searched_class, 'decipher')
e0df6211
PH
1322 return lambda s: initial_function([s])
1323
83799698 1324 def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
257a2501 1325 """Turn the encrypted s field into a working signature"""
6b37f0be 1326
c8bf86d5 1327 if player_url is None:
69ea8ca4 1328 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1329
69ea8ca4 1330 if player_url.startswith('//'):
78caa52a 1331 player_url = 'https:' + player_url
3c90cc8b
S
1332 elif not re.match(r'https?://', player_url):
1333 player_url = compat_urlparse.urljoin(
1334 'https://www.youtube.com', player_url)
c8bf86d5 1335 try:
62af3a0e 1336 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1337 if player_id not in self._player_cache:
1338 func = self._extract_signature_function(
60064c53 1339 video_id, player_url, s
c8bf86d5
PH
1340 )
1341 self._player_cache[player_id] = func
1342 func = self._player_cache[player_id]
1343 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1344 self._print_sig_code(func, s)
c8bf86d5
PH
1345 return func(s)
1346 except Exception as e:
1347 tb = traceback.format_exc()
1348 raise ExtractorError(
78caa52a 1349 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1350
f96f5dda 1351 def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
de7f3446 1352 try:
60e47a26 1353 subs_doc = self._download_xml(
38c2e5b8 1354 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
7fad1c63
JMF
1355 video_id, note=False)
1356 except ExtractorError as err:
9b9c5355 1357 self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
de7f3446 1358 return {}
de7f3446
JMF
1359
1360 sub_lang_list = {}
60e47a26
JMF
1361 for track in subs_doc.findall('track'):
1362 lang = track.attrib['lang_code']
7e660ac1
LD
1363 if lang in sub_lang_list:
1364 continue
360e1ca5 1365 sub_formats = []
23d17e4b 1366 for ext in self._SUBTITLE_FORMATS:
15707c7e 1367 params = compat_urllib_parse_urlencode({
360e1ca5
JMF
1368 'lang': lang,
1369 'v': video_id,
1370 'fmt': ext,
1371 'name': track.attrib['name'].encode('utf-8'),
1372 })
1373 sub_formats.append({
1374 'url': 'https://www.youtube.com/api/timedtext?' + params,
1375 'ext': ext,
1376 })
1377 sub_lang_list[lang] = sub_formats
9f448fcb 1378 if has_live_chat_replay:
321bf820 1379 sub_lang_list['live_chat'] = [
1380 {
1381 'video_id': video_id,
1382 'ext': 'json',
1383 'protocol': 'youtube_live_chat_replay',
1384 },
9f448fcb 1385 ]
de7f3446 1386 if not sub_lang_list:
69ea8ca4 1387 self._downloader.report_warning('video doesn\'t have subtitles')
de7f3446
JMF
1388 return {}
1389 return sub_lang_list
1390
a72778d3
S
1391 def _get_ytplayer_config(self, video_id, webpage):
1392 patterns = (
526b3b07
S
1393 # User data may contain arbitrary character sequences that may affect
1394 # JSON extraction with regex, e.g. when '};' is contained the second
1395 # regex won't capture the whole JSON. Yet working around by trying more
1396 # concrete regex first keeping in mind proper quoted string handling
1397 # to be implemented in future that will replace this workaround (see
067aa17e
S
1398 # https://github.com/ytdl-org/youtube-dl/issues/7468,
1399 # https://github.com/ytdl-org/youtube-dl/pull/7599)
a72778d3
S
1400 r';ytplayer\.config\s*=\s*({.+?});ytplayer',
1401 r';ytplayer\.config\s*=\s*({.+?});',
59c5fa91 1402 r'ytInitialPlayerResponse\s*=\s*({.+?});var meta'
a72778d3
S
1403 )
1404 config = self._search_regex(
1405 patterns, webpage, 'ytplayer.config', default=None)
1406 if config:
1407 return self._parse_json(
1408 uppercase_escape(config), video_id, fatal=False)
0e49d9a6 1409
9322f116 1410 def _get_music_metadata_from_yt_initial(self, yt_initial):
1411 music_metadata = []
1412 key_map = {
1413 'Album': 'album',
1414 'Artist': 'artist',
1415 'Song': 'track'
1416 }
1417 contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])
1418 if type(contents) is list:
1419 for content in contents:
1420 music_track = {}
1421 if type(content) is not dict:
1422 continue
1423 videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])
1424 if type(videoSecondaryInfoRenderer) is not dict:
1425 continue
1426 rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])
1427 if type(rows) is not list:
1428 continue
1429 for row in rows:
1430 metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])
1431 if type(metadataRowRenderer) is not dict:
1432 continue
1433 key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])
1434 value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \
1435 try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])
1436 if type(key) is not str or type(value) is not str:
1437 continue
1438 if key in key_map:
1439 if key_map[key] in music_track:
1440 # we've started on a new track
1441 music_metadata.append(music_track)
1442 music_track = {}
1443 music_track[key_map[key]] = value
1444 if len(music_track.keys()):
1445 music_metadata.append(music_track)
1446 return music_metadata
1447
360e1ca5 1448 def _get_automatic_captions(self, video_id, webpage):
de7f3446
JMF
1449 """We need the webpage for getting the captions url, pass it as an
1450 argument to speed up the process."""
69ea8ca4 1451 self.to_screen('%s: Looking for automatic captions' % video_id)
a72778d3 1452 player_config = self._get_ytplayer_config(video_id, webpage)
78caa52a 1453 err_msg = 'Couldn\'t find automatic captions for %s' % video_id
a72778d3 1454 if not player_config:
de7f3446
JMF
1455 self._downloader.report_warning(err_msg)
1456 return {}
de7f3446 1457 try:
59c5fa91
PO
1458 if "args" in player_config and "ttsurl" in player_config["args"]:
1459 args = player_config['args']
1460 caption_url = args['ttsurl']
b78b292f 1461 timestamp = args['timestamp']
59c5fa91 1462
b78b292f 1463 # We get the available subtitles
15707c7e 1464 list_params = compat_urllib_parse_urlencode({
b78b292f
S
1465 'type': 'list',
1466 'tlangs': 1,
1467 'asrs': 1,
1468 })
1469 list_url = caption_url + '&' + list_params
1470 caption_list = self._download_xml(list_url, video_id)
1471 original_lang_node = caption_list.find('track')
1472 if original_lang_node is None:
1473 self._downloader.report_warning('Video doesn\'t have automatic captions')
1474 return {}
1475 original_lang = original_lang_node.attrib['lang_code']
1476 caption_kind = original_lang_node.attrib.get('kind', '')
1477
1478 sub_lang_list = {}
1479 for lang_node in caption_list.findall('target'):
1480 sub_lang = lang_node.attrib['lang_code']
1481 sub_formats = []
1482 for ext in self._SUBTITLE_FORMATS:
15707c7e 1483 params = compat_urllib_parse_urlencode({
b78b292f
S
1484 'lang': original_lang,
1485 'tlang': sub_lang,
1486 'fmt': ext,
1487 'ts': timestamp,
1488 'kind': caption_kind,
1489 })
1490 sub_formats.append({
1491 'url': caption_url + '&' + params,
1492 'ext': ext,
1493 })
1494 sub_lang_list[sub_lang] = sub_formats
1495 return sub_lang_list
1496
ddbb4c5c
S
1497 def make_captions(sub_url, sub_langs):
1498 parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
1499 caption_qs = compat_parse_qs(parsed_sub_url.query)
1500 captions = {}
1501 for sub_lang in sub_langs:
1502 sub_formats = []
1503 for ext in self._SUBTITLE_FORMATS:
1504 caption_qs.update({
1505 'tlang': [sub_lang],
1506 'fmt': [ext],
1507 })
1508 sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
1509 query=compat_urllib_parse_urlencode(caption_qs, True)))
1510 sub_formats.append({
1511 'url': sub_url,
1512 'ext': ext,
1513 })
1514 captions[sub_lang] = sub_formats
1515 return captions
1516
1517 # New captions format as of 22.06.2017
59c5fa91
PO
1518 if "args" in player_config:
1519 player_response = player_config["args"].get('player_response')
1520 else:
1521 # New player system (ytInitialPlayerResponse) as of October 2020
1522 player_response = player_config
1523
1524 if player_response:
1525 if isinstance(player_response, compat_str):
1526 player_response = self._parse_json(
1527 player_response, video_id, fatal=False)
1528
1529 renderer = player_response['captions']['playerCaptionsTracklistRenderer']
1530 caption_tracks = renderer['captionTracks']
1531 for caption_track in caption_tracks:
1532 if 'kind' not in caption_track:
1533 # not an automatic transcription
1534 continue
1535 base_url = caption_track['baseUrl']
1536 sub_lang_list = []
1537 for lang in renderer['translationLanguages']:
1538 lang_code = lang.get('languageCode')
1539 if lang_code:
1540 sub_lang_list.append(lang_code)
1541 return make_captions(base_url, sub_lang_list)
1542
1543 self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
1544 return {}
1545
1546 if "args" in player_config:
1547 args = player_config["args"]
1548
1549 # Some videos don't provide ttsurl but rather caption_tracks and
1550 # caption_translation_languages (e.g. 20LmZk1hakA)
1551 # Does not used anymore as of 22.06.2017
1552 caption_tracks = args['caption_tracks']
1553 caption_translation_languages = args['caption_translation_languages']
1554 caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
1555 sub_lang_list = []
1556 for lang in caption_translation_languages.split(','):
1557 lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
1558 sub_lang = lang_qs.get('lc', [None])[0]
1559 if sub_lang:
1560 sub_lang_list.append(sub_lang)
1561 return make_captions(caption_url, sub_lang_list)
de7f3446
JMF
1562 # An extractor error can be raise by the download process if there are
1563 # no automatic captions but there are subtitles
ddbb4c5c 1564 except (KeyError, IndexError, ExtractorError):
de7f3446
JMF
1565 self._downloader.report_warning(err_msg)
1566 return {}
1567
21c340b8
S
1568 def _mark_watched(self, video_id, video_info, player_response):
1569 playback_url = url_or_none(try_get(
1570 player_response,
1571 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
1572 video_info, lambda x: x['videostats_playback_base_url'][0]))
d77ab8e2
S
1573 if not playback_url:
1574 return
1575 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1576 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1577
1578 # cpn generation algorithm is reverse engineered from base.js.
1579 # In fact it works even with dummy cpn.
1580 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1581 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1582
1583 qs.update({
1584 'ver': ['2'],
1585 'cpn': [cpn],
1586 })
1587 playback_url = compat_urlparse.urlunparse(
15707c7e 1588 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1589
1590 self._download_webpage(
1591 playback_url, video_id, 'Marking watched',
1592 'Unable to mark watched', fatal=False)
1593
66c9fa36
S
1594 @staticmethod
1595 def _extract_urls(webpage):
1596 # Embedded YouTube player
1597 entries = [
1598 unescapeHTML(mobj.group('url'))
1599 for mobj in re.finditer(r'''(?x)
1600 (?:
1601 <iframe[^>]+?src=|
1602 data-video-url=|
1603 <embed[^>]+?src=|
1604 embedSWF\(?:\s*|
1605 <object[^>]+data=|
1606 new\s+SWFObject\(
1607 )
1608 (["\'])
1609 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1610 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1611 \1''', webpage)]
1612
1613 # lazyYT YouTube embed
1614 entries.extend(list(map(
1615 unescapeHTML,
1616 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1617
1618 # Wordpress "YouTube Video Importer" plugin
1619 matches = re.findall(r'''(?x)<div[^>]+
1620 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1621 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1622 entries.extend(m[-1] for m in matches)
1623
1624 return entries
1625
1626 @staticmethod
1627 def _extract_url(webpage):
1628 urls = YoutubeIE._extract_urls(webpage)
1629 return urls[0] if urls else None
1630
97665381
PH
1631 @classmethod
1632 def extract_id(cls, url):
1633 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1634 if mobj is None:
69ea8ca4 1635 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1636 video_id = mobj.group(2)
1637 return video_id
1638
84213ea8
S
1639 def _extract_chapters_from_json(self, webpage, video_id, duration):
1640 if not webpage:
1641 return
edd83104 1642 initial_data = self._parse_json(
84213ea8 1643 self._search_regex(
edd83104 1644 r'window\["ytInitialData"\] = (.+);\n', webpage,
84213ea8
S
1645 'player args', default='{}'),
1646 video_id, fatal=False)
edd83104 1647 if not initial_data or not isinstance(initial_data, dict):
84213ea8
S
1648 return
1649 chapters_list = try_get(
edd83104 1650 initial_data,
84213ea8
S
1651 lambda x: x['playerOverlays']
1652 ['playerOverlayRenderer']
1653 ['decoratedPlayerBarRenderer']
1654 ['decoratedPlayerBarRenderer']
1655 ['playerBar']
1656 ['chapteredPlayerBarRenderer']
1657 ['chapters'],
1658 list)
1659 if not chapters_list:
1660 return
1661
1662 def chapter_time(chapter):
1663 return float_or_none(
1664 try_get(
1665 chapter,
1666 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1667 int),
1668 scale=1000)
1669 chapters = []
1670 for next_num, chapter in enumerate(chapters_list, start=1):
1671 start_time = chapter_time(chapter)
1672 if start_time is None:
1673 continue
1674 end_time = (chapter_time(chapters_list[next_num])
1675 if next_num < len(chapters_list) else duration)
1676 if end_time is None:
1677 continue
1678 title = try_get(
1679 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1680 compat_str)
1681 chapters.append({
1682 'start_time': start_time,
1683 'end_time': end_time,
1684 'title': title,
1685 })
1686 return chapters
1687
9cafc3fd 1688 @staticmethod
84213ea8 1689 def _extract_chapters_from_description(description, duration):
9cafc3fd
S
1690 if not description:
1691 return None
1692 chapter_lines = re.findall(
1693 r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
1694 description)
1695 if not chapter_lines:
1696 return None
1697 chapters = []
1698 for next_num, (chapter_line, time_point) in enumerate(
1699 chapter_lines, start=1):
1700 start_time = parse_duration(time_point)
1701 if start_time is None:
1702 continue
39d4c1be
S
1703 if start_time > duration:
1704 break
9cafc3fd
S
1705 end_time = (duration if next_num == len(chapter_lines)
1706 else parse_duration(chapter_lines[next_num][1]))
1707 if end_time is None:
1708 continue
39d4c1be
S
1709 if end_time > duration:
1710 end_time = duration
1711 if start_time > end_time:
1712 break
9cafc3fd
S
1713 chapter_title = re.sub(
1714 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
1715 chapter_title = re.sub(r'\s+', ' ', chapter_title)
1716 chapters.append({
1717 'start_time': start_time,
1718 'end_time': end_time,
1719 'title': chapter_title,
1720 })
1721 return chapters
1722
84213ea8
S
1723 def _extract_chapters(self, webpage, description, video_id, duration):
1724 return (self._extract_chapters_from_json(webpage, video_id, duration)
1725 or self._extract_chapters_from_description(description, duration))
1726
c5e8d7af 1727 def _real_extract(self, url):
cf7e015f
S
1728 url, smuggled_data = unsmuggle_url(url, {})
1729
7e8c0af0 1730 proto = (
78caa52a
PH
1731 'http' if self._downloader.params.get('prefer_insecure', False)
1732 else 'https')
7e8c0af0 1733
7c80519c 1734 start_time = None
297a564b 1735 end_time = None
7c80519c
JMF
1736 parsed_url = compat_urllib_parse_urlparse(url)
1737 for component in [parsed_url.fragment, parsed_url.query]:
1738 query = compat_parse_qs(component)
297a564b 1739 if start_time is None and 't' in query:
7c80519c 1740 start_time = parse_duration(query['t'][0])
2929fa0e
JMF
1741 if start_time is None and 'start' in query:
1742 start_time = parse_duration(query['start'][0])
297a564b
JMF
1743 if end_time is None and 'end' in query:
1744 end_time = parse_duration(query['end'][0])
7c80519c 1745
c5e8d7af
PH
1746 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
1747 mobj = re.search(self._NEXT_URL_RE, url)
1748 if mobj:
7fd002c0 1749 url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
97665381 1750 video_id = self.extract_id(url)
c5e8d7af
PH
1751
1752 # Get video webpage
aa79ac0c 1753 url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
011e75e6
S
1754 video_webpage, urlh = self._download_webpage_handle(url, video_id)
1755
1756 qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
1757 video_id = qs.get('v', [None])[0] or video_id
c5e8d7af
PH
1758
1759 # Attempt to extract SWF player URL
e0df6211 1760 mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
c5e8d7af
PH
1761 if mobj is not None:
1762 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1763 else:
1764 player_url = None
1765
d8d24a92
S
1766 dash_mpds = []
1767
1768 def add_dash_mpd(video_info):
1769 dash_mpd = video_info.get('dashmpd')
1770 if dash_mpd and dash_mpd[0] not in dash_mpds:
1771 dash_mpds.append(dash_mpd[0])
1772
561b456e
S
1773 def add_dash_mpd_pr(pl_response):
1774 dash_mpd = url_or_none(try_get(
1775 pl_response, lambda x: x['streamingData']['dashManifestUrl'],
1776 compat_str))
1777 if dash_mpd and dash_mpd not in dash_mpds:
1778 dash_mpds.append(dash_mpd)
1779
c7121fa7
S
1780 is_live = None
1781 view_count = None
1782
1783 def extract_view_count(v_info):
1784 return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
1785
c2d125d9
S
1786 def extract_player_response(player_response, video_id):
1787 pl_response = str_or_none(player_response)
1788 if not pl_response:
1789 return
1790 pl_response = self._parse_json(pl_response, video_id, fatal=False)
1791 if isinstance(pl_response, dict):
1792 add_dash_mpd_pr(pl_response)
1793 return pl_response
1794
fb2c9277
U
1795 def extract_embedded_config(embed_webpage, video_id):
1796 embedded_config = self._search_regex(
1797 r'setConfig\(({.*})\);',
1798 embed_webpage, 'ytInitialData', default=None)
1799 if embedded_config:
1800 return embedded_config
1801
dbdaaa23
S
1802 player_response = {}
1803
c5e8d7af 1804 # Get video info
43ebf77d 1805 video_info = {}
6449cd80 1806 embed_webpage = None
39e7107d
U
1807 if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
1808 or re.search(r'player-age-gate-content">', video_webpage) is not None):
9d9314cb 1809 cookie_keys = self._get_cookies('https://www.youtube.com').keys()
c108eb73
JMF
1810 age_gate = True
1811 # We simulate the access to the video from www.youtube.com/v/{video_id}
1812 # this can be viewed without login into Youtube
beb95e77
CL
1813 url = proto + '://www.youtube.com/embed/%s' % video_id
1814 embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
fb2c9277
U
1815 ext = extract_embedded_config(embed_webpage, video_id)
1816 # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
1817 playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
1818 if not playable_in_embed:
1819 self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
1820 playable_in_embed = ''
1821 else:
1822 playable_in_embed = playable_in_embed.group('playableinEmbed')
1823 # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
1824 # if re.search(r'player-unavailable">', embed_webpage) is not None:
1825 if playable_in_embed == 'false':
c73baf23
U
1826 '''
1827 # TODO apply this patch when Support for Python 2.6(!) and above drops
9d9314cb 1828 if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
4bb9c880 1829 or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
c73baf23
U
1830 '''
1831 if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
1832 or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
4bb9c880
U
1833 age_gate = False
1834 # Try looking directly into the video webpage
1835 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
1836 if ytplayer_config:
59c5fa91
PO
1837 args = ytplayer_config.get("args")
1838 if args is not None:
1839 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
1840 # Convert to the same format returned by compat_parse_qs
1841 video_info = dict((k, [v]) for k, v in args.items())
1842 add_dash_mpd(video_info)
1843 # Rental video is not rented but preview is available (e.g.
1844 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
1845 # https://github.com/ytdl-org/youtube-dl/issues/10532)
1846 if not video_info and args.get('ypc_vid'):
1847 return self.url_result(
1848 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
1849 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1850 is_live = True
1851 if not player_response:
1852 player_response = extract_player_response(args.get('player_response'), video_id)
1853 elif not player_response:
1854 player_response = ytplayer_config
4bb9c880
U
1855 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
1856 add_dash_mpd_pr(player_response)
9d9314cb
U
1857 else:
1858 raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
1859 else:
1860 data = compat_urllib_parse_urlencode({
1861 'video_id': video_id,
1862 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1863 'sts': self._search_regex(
1864 r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
1865 })
1866 video_info_url = proto + '://www.youtube.com/get_video_info?' + data
1867 try:
1868 video_info_webpage = self._download_webpage(
1869 video_info_url, video_id,
1870 note='Refetching age-gated info webpage',
1871 errnote='unable to download video info webpage')
1872 except ExtractorError:
1873 video_info_webpage = None
1874 if video_info_webpage:
1875 video_info = compat_parse_qs(video_info_webpage)
1876 pl_response = video_info.get('player_response', [None])[0]
1877 player_response = extract_player_response(pl_response, video_id)
1878 add_dash_mpd(video_info)
1879 view_count = extract_view_count(video_info)
c108eb73
JMF
1880 else:
1881 age_gate = False
d8d24a92 1882 # Try looking directly into the video webpage
a72778d3 1883 ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
59c5fa91
PO
1884 args = ytplayer_config.get("args")
1885 if args is not None:
4c76aa06 1886 if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
d8d24a92
S
1887 # Convert to the same format returned by compat_parse_qs
1888 video_info = dict((k, [v]) for k, v in args.items())
1889 add_dash_mpd(video_info)
6496ccb4
S
1890 # Rental video is not rented but preview is available (e.g.
1891 # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
067aa17e 1892 # https://github.com/ytdl-org/youtube-dl/issues/10532)
6496ccb4
S
1893 if not video_info and args.get('ypc_vid'):
1894 return self.url_result(
1895 args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
2fe1ff85
JMF
1896 if args.get('livestream') == '1' or args.get('live_playback') == 1:
1897 is_live = True
dbdaaa23 1898 if not player_response:
c2d125d9 1899 player_response = extract_player_response(args.get('player_response'), video_id)
59c5fa91
PO
1900 elif not player_response:
1901 player_response = ytplayer_config
0a3cf9ad 1902 if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
561b456e 1903 add_dash_mpd_pr(player_response)
bbb7c3f7
YCH
1904
1905 def extract_unavailable_message():
0add33ab
S
1906 messages = []
1907 for tag, kind in (('h1', 'message'), ('div', 'submessage')):
1908 msg = self._html_search_regex(
1909 r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
1910 video_webpage, 'unavailable %s' % kind, default=None)
1911 if msg:
1912 messages.append(msg)
1913 if messages:
1914 return '\n'.join(messages)
bbb7c3f7 1915
f93abcf1 1916 if not video_info and not player_response:
15be3eb5
RA
1917 unavailable_message = extract_unavailable_message()
1918 if not unavailable_message:
1919 unavailable_message = 'Unable to extract video data'
1920 raise ExtractorError(
1921 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
1922
f93abcf1
S
1923 if not isinstance(video_info, dict):
1924 video_info = {}
1925
dbdaaa23
S
1926 video_details = try_get(
1927 player_response, lambda x: x['videoDetails'], dict) or {}
1928
37357d21
S
1929 microformat = try_get(
1930 player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
1931
8dbf751a
RA
1932 video_title = video_info.get('title', [None])[0] or video_details.get('title')
1933 if not video_title:
cf7e015f
S
1934 self._downloader.report_warning('Unable to extract video title')
1935 video_title = '_'
1936
9cafc3fd 1937 description_original = video_description = get_element_by_id("eow-description", video_webpage)
cf7e015f 1938 if video_description:
fa4bc6e7
RA
1939
1940 def replace_url(m):
1941 redir_url = compat_urlparse.urljoin(url, m.group(1))
1942 parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
1943 if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
1944 qs = compat_parse_qs(parsed_redir_url.query)
1945 q = qs.get('q')
1946 if q and q[0]:
1947 return q[0]
1948 return redir_url
1949
9cafc3fd 1950 description_original = video_description = re.sub(r'''(?x)
cf7e015f 1951 <a\s+
25cb7a0e 1952 (?:[a-zA-Z-]+="[^"]*"\s+)*?
23f13e97 1953 (?:title|href)="([^"]+)"\s+
25cb7a0e 1954 (?:[a-zA-Z-]+="[^"]*"\s+)*?
525cedb9 1955 class="[^"]*"[^>]*>
23f13e97 1956 [^<]+\.{3}\s*
cf7e015f 1957 </a>
fa4bc6e7 1958 ''', replace_url, video_description)
cf7e015f
S
1959 video_description = clean_html(video_description)
1960 else:
ea74e00b
DP
1961 video_description = video_details.get('shortDescription')
1962 if video_description is None:
1963 video_description = self._html_search_meta('description', video_webpage)
cf7e015f 1964
8fe10494 1965 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1966 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1967 multifeed_metadata_list = try_get(
1968 player_response,
1969 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
1970 compat_str) or try_get(
1971 video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
1972 if multifeed_metadata_list:
1973 entries = []
1974 feed_ids = []
1975 for feed in multifeed_metadata_list.split(','):
1976 # Unquote should take place before split on comma (,) since textual
1977 # fields may contain comma as well (see
067aa17e 1978 # https://github.com/ytdl-org/youtube-dl/issues/8536)
8fe10494 1979 feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1980
1981 def feed_entry(name):
1982 return try_get(feed_data, lambda x: x[name][0], compat_str)
1983
1984 feed_id = feed_entry('id')
1985 if not feed_id:
1986 continue
1987 feed_title = feed_entry('title')
1988 title = video_title
1989 if feed_title:
1990 title += ' (%s)' % feed_title
8fe10494
S
1991 entries.append({
1992 '_type': 'url_transparent',
1993 'ie_key': 'Youtube',
1994 'url': smuggle_url(
1995 '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
1996 {'force_singlefeed': True}),
6b09401b 1997 'title': title,
8fe10494 1998 })
6b09401b 1999 feed_ids.append(feed_id)
8fe10494
S
2000 self.to_screen(
2001 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2002 % (', '.join(feed_ids), video_id))
2003 return self.playlist_result(entries, video_id, video_title, video_description)
2004 else:
2005 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2006
c7121fa7 2007 if view_count is None:
1c9c8de2 2008 view_count = extract_view_count(video_info)
dbdaaa23
S
2009 if view_count is None and video_details:
2010 view_count = int_or_none(video_details.get('viewCount'))
7b16239a
S
2011 if view_count is None and microformat:
2012 view_count = int_or_none(microformat.get('viewCount'))
1d699755 2013
27019dbb 2014 if is_live is None:
898238e9 2015 is_live = bool_or_none(video_details.get('isLive'))
27019dbb 2016
321bf820 2017 has_live_chat_replay = False
f0f76a33 2018 if not is_live:
321bf820 2019 yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
2020 try:
2021 yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2022 has_live_chat_replay = True
f0f76a33 2023 except (KeyError, IndexError, TypeError):
321bf820 2024 pass
2025
c5e8d7af
PH
2026 # Check for "rental" videos
2027 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
067aa17e 2028 raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
c5e8d7af 2029
c63ca0ee
S
2030 def _extract_filesize(media_url):
2031 return int_or_none(self._search_regex(
2032 r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
2033
bf1317d2
S
2034 streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
2035 streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
2036
c5e8d7af
PH
2037 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
2038 self.report_rtmp_download()
dd27fd17
PH
2039 formats = [{
2040 'format_id': '_rtmp',
2041 'protocol': 'rtmp',
2042 'url': video_info['conn'][0],
2043 'player_url': player_url,
2044 }]
bf1317d2 2045 elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
5f6a1245 2046 encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
00fe14fc 2047 if 'rtmpe%3Dyes' in encoded_url_map:
067aa17e 2048 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
bf1317d2 2049 formats = []
3318832e 2050 formats_spec = {}
82156fdb 2051 fmt_list = video_info.get('fmt_list', [''])[0]
2052 if fmt_list:
2053 for fmt in fmt_list.split(','):
2054 spec = fmt.split('/')
3318832e 2055 if len(spec) > 1:
2056 width_height = spec[1].split('x')
2057 if len(width_height) == 2:
2058 formats_spec[spec[0]] = {
2059 'resolution': spec[1],
2060 'width': int_or_none(width_height[0]),
2061 'height': int_or_none(width_height[1]),
2062 }
bf1317d2
S
2063 for fmt in streaming_formats:
2064 itag = str_or_none(fmt.get('itag'))
2065 if not itag:
201e9eaa 2066 continue
bf1317d2
S
2067 quality = fmt.get('quality')
2068 quality_label = fmt.get('qualityLabel') or quality
2069 formats_spec[itag] = {
2070 'asr': int_or_none(fmt.get('audioSampleRate')),
2071 'filesize': int_or_none(fmt.get('contentLength')),
2072 'format_note': quality_label,
2073 'fps': int_or_none(fmt.get('fps')),
2074 'height': int_or_none(fmt.get('height')),
bf1317d2
S
2075 # bitrate for itag 43 is always 2147483647
2076 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
2077 'width': int_or_none(fmt.get('width')),
2078 }
2079
2080 for fmt in streaming_formats:
00eb865b 2081 if fmt.get('drmFamilies') or fmt.get('drm_families'):
bf1317d2
S
2082 continue
2083 url = url_or_none(fmt.get('url'))
2084
2085 if not url:
fa3db383 2086 cipher = fmt.get('cipher') or fmt.get('signatureCipher')
bf1317d2
S
2087 if not cipher:
2088 continue
2089 url_data = compat_parse_qs(cipher)
2090 url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
2091 if not url:
2092 continue
2093 else:
2094 cipher = None
2095 url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
2096
2f483bc1
S
2097 stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
2098 # Unsupported FORMAT_STREAM_TYPE_OTF
2099 if stream_type == 3:
2100 continue
6449cd80 2101
bf1317d2
S
2102 format_id = fmt.get('itag') or url_data['itag'][0]
2103 if not format_id:
2104 continue
2105 format_id = compat_str(format_id)
a49eccdf 2106
bf1317d2
S
2107 if cipher:
2108 if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
67b19799 2109 ASSETS_RE = r'(?:"assets":.+?"js":\s*("[^"]+"))|(?:"jsUrl":\s*("[^"]+"))'
bf1317d2
S
2110 jsplayer_url_json = self._search_regex(
2111 ASSETS_RE,
2112 embed_webpage if age_gate else video_webpage,
2113 'JS player URL (1)', default=None)
2114 if not jsplayer_url_json and not age_gate:
2115 # We need the embed website after all
2116 if embed_webpage is None:
2117 embed_url = proto + '://www.youtube.com/embed/%s' % video_id
2118 embed_webpage = self._download_webpage(
2119 embed_url, video_id, 'Downloading embed webpage')
2120 jsplayer_url_json = self._search_regex(
2121 ASSETS_RE, embed_webpage, 'JS player URL')
2122
2123 player_url = json.loads(jsplayer_url_json)
cf010131 2124 if player_url is None:
bf1317d2
S
2125 player_url_json = self._search_regex(
2126 r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
2127 video_webpage, 'age gate player URL')
2128 player_url = json.loads(player_url_json)
2129
2130 if 'sig' in url_data:
2131 url += '&signature=' + url_data['sig'][0]
2132 elif 's' in url_data:
2133 encrypted_sig = url_data['s'][0]
2134
2135 if self._downloader.params.get('verbose'):
2136 if player_url is None:
bf1317d2 2137 player_desc = 'unknown'
cf010131 2138 else:
e40c758c
S
2139 player_type, player_version = self._extract_player_info(player_url)
2140 player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
bf1317d2
S
2141 parts_sizes = self._signature_cache_id(encrypted_sig)
2142 self.to_screen('{%s} signature length %s, %s' %
2143 (format_id, parts_sizes, player_desc))
2144
2145 signature = self._decrypt_signature(
2146 encrypted_sig, video_id, player_url, age_gate)
2147 sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
2148 url += '&%s=%s' % (sp, signature)
201e9eaa
PH
2149 if 'ratebypass' not in url:
2150 url += '&ratebypass=yes'
c9afb51c 2151
94278f72
YCH
2152 dct = {
2153 'format_id': format_id,
2154 'url': url,
2155 'player_url': player_url,
2156 }
2157 if format_id in self._formats:
2158 dct.update(self._formats[format_id])
3318832e 2159 if format_id in formats_spec:
2160 dct.update(formats_spec[format_id])
94278f72 2161
aabc2be6 2162 # Some itags are not included in DASH manifest thus corresponding formats will
067aa17e 2163 # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
aabc2be6
S
2164 # Trying to extract metadata from url_encoded_fmt_stream_map entry.
2165 mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
2166 width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
94278f72 2167
bf1317d2
S
2168 if width is None:
2169 width = int_or_none(fmt.get('width'))
2170 if height is None:
2171 height = int_or_none(fmt.get('height'))
2172
c63ca0ee
S
2173 filesize = int_or_none(url_data.get(
2174 'clen', [None])[0]) or _extract_filesize(url)
2175
bf1317d2
S
2176 quality = url_data.get('quality', [None])[0] or fmt.get('quality')
2177 quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
2178
4878759f
S
2179 tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
2180 or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
bf1317d2 2181 fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
54fc90aa 2182
94278f72 2183 more_fields = {
c63ca0ee 2184 'filesize': filesize,
bf1317d2 2185 'tbr': tbr,
c9afb51c
AH
2186 'width': width,
2187 'height': height,
bf1317d2
S
2188 'fps': fps,
2189 'format_note': quality_label or quality,
c9afb51c 2190 }
94278f72
YCH
2191 for key, value in more_fields.items():
2192 if value:
2193 dct[key] = value
bf1317d2 2194 type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
aabc2be6
S
2195 if type_:
2196 type_split = type_.split(';')
2197 kind_ext = type_split[0].split('/')
2198 if len(kind_ext) == 2:
94278f72
YCH
2199 kind, _ = kind_ext
2200 dct['ext'] = mimetype2ext(type_split[0])
aabc2be6
S
2201 if kind in ('audio', 'video'):
2202 codecs = None
2203 for mobj in re.finditer(
2204 r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
2205 if mobj.group('key') == 'codecs':
2206 codecs = mobj.group('val')
2207 break
2208 if codecs:
6310acf5 2209 dct.update(parse_codecs(codecs))
e4a60912
S
2210 if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
2211 dct['downloader_options'] = {
2212 # Youtube throttles chunks >~10M
2213 'http_chunk_size': 10485760,
2214 }
aabc2be6 2215 formats.append(dct)
c5e8d7af 2216 else:
c3e54389
S
2217 manifest_url = (
2218 url_or_none(try_get(
2219 player_response,
2220 lambda x: x['streamingData']['hlsManifestUrl'],
3089bc74
S
2221 compat_str))
2222 or url_or_none(try_get(
c3e54389
S
2223 video_info, lambda x: x['hlsvp'][0], compat_str)))
2224 if manifest_url:
2225 formats = []
2226 m3u8_formats = self._extract_m3u8_formats(
2227 manifest_url, video_id, 'mp4', fatal=False)
2228 for a_format in m3u8_formats:
2229 itag = self._search_regex(
2230 r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
2231 if itag:
2232 a_format['format_id'] = itag
2233 if itag in self._formats:
2234 dct = self._formats[itag].copy()
2235 dct.update(a_format)
2236 a_format = dct
2237 a_format['player_url'] = player_url
2238 # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
2239 a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
78895bd3
U
2240 if self._downloader.params.get('youtube_include_hls_manifest', True):
2241 formats.append(a_format)
c3e54389 2242 else:
13577349 2243 error_message = extract_unavailable_message()
c3e54389 2244 if not error_message:
13577349
S
2245 error_message = clean_html(try_get(
2246 player_response, lambda x: x['playabilityStatus']['reason'],
2247 compat_str))
2248 if not error_message:
2249 error_message = clean_html(
2250 try_get(video_info, lambda x: x['reason'][0], compat_str))
c3e54389
S
2251 if error_message:
2252 raise ExtractorError(error_message, expected=True)
2253 raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
c5e8d7af 2254
7e72694b 2255 # uploader
dbdaaa23
S
2256 video_uploader = try_get(
2257 video_info, lambda x: x['author'][0],
2258 compat_str) or str_or_none(video_details.get('author'))
7e72694b
S
2259 if video_uploader:
2260 video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
2261 else:
2262 self._downloader.report_warning('unable to extract uploader name')
2263
2264 # uploader_id
2265 video_uploader_id = None
2266 video_uploader_url = None
2267 mobj = re.search(
2268 r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
2269 video_webpage)
2270 if mobj is not None:
2271 video_uploader_id = mobj.group('uploader_id')
2272 video_uploader_url = mobj.group('uploader_url')
a6211d23
S
2273 else:
2274 owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
2275 if owner_profile_url:
2276 video_uploader_id = self._search_regex(
2277 r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
2278 default=None)
2279 video_uploader_url = owner_profile_url
7e72694b 2280
b45a9e69 2281 channel_id = (
3089bc74
S
2282 str_or_none(video_details.get('channelId'))
2283 or self._html_search_meta(
2284 'channelId', video_webpage, 'channel id', default=None)
2285 or self._search_regex(
b45a9e69 2286 r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
2287 video_webpage, 'channel id', default=None, group='id'))
dd4c4492
S
2288 channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
2289
b477fc13
S
2290 thumbnails = []
2291 thumbnails_list = try_get(
2292 video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
2293 for t in thumbnails_list:
2294 if not isinstance(t, dict):
2295 continue
2296 thumbnail_url = url_or_none(t.get('url'))
2297 if not thumbnail_url:
2298 continue
2299 thumbnails.append({
2300 'url': thumbnail_url,
2301 'width': int_or_none(t.get('width')),
2302 'height': int_or_none(t.get('height')),
2303 })
2304
2305 if not thumbnails:
7e72694b 2306 video_thumbnail = None
b477fc13
S
2307 # We try first to get a high quality image:
2308 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
2309 video_webpage, re.DOTALL)
2310 if m_thumb is not None:
2311 video_thumbnail = m_thumb.group(1)
2312 thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
2313 if thumbnail_url:
2314 video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
2315 if video_thumbnail:
2316 thumbnails.append({'url': video_thumbnail})
7e72694b
S
2317
2318 # upload date
2319 upload_date = self._html_search_meta(
2320 'datePublished', video_webpage, 'upload date', default=None)
2321 if not upload_date:
2322 upload_date = self._search_regex(
2323 [r'(?s)id="eow-date.*?>(.*?)</span>',
2324 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
2325 video_webpage, 'upload date', default=None)
37357d21
S
2326 if not upload_date:
2327 upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
7e72694b
S
2328 upload_date = unified_strdate(upload_date)
2329
2330 video_license = self._html_search_regex(
2331 r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
2332 video_webpage, 'license', default=None)
2333
2334 m_music = re.search(
2335 r'''(?x)
2336 <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
2337 <ul[^>]*>\s*
2338 <li>(?P<title>.+?)
2339 by (?P<creator>.+?)
2340 (?:
2341 \(.+?\)|
2342 <a[^>]*
2343 (?:
2344 \bhref=["\']/red[^>]*>| # drop possible
2345 >\s*Listen ad-free with YouTube Red # YouTube Red ad
2346 )
2347 .*?
2348 )?</li
2349 ''',
2350 video_webpage)
2351 if m_music:
2352 video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
2353 video_creator = clean_html(m_music.group('creator'))
2354 else:
2355 video_alt_title = video_creator = None
2356
2357 def extract_meta(field):
2358 return self._html_search_regex(
2359 r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
2360 video_webpage, field, default=None)
2361
2362 track = extract_meta('Song')
2363 artist = extract_meta('Artist')
92bc97d3 2364 album = extract_meta('Album')
822b9d9c
RA
2365
2366 # Youtube Music Auto-generated description
92bc97d3 2367 release_date = release_year = None
822b9d9c
RA
2368 if video_description:
2369 mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
2370 if mobj:
2371 if not track:
2372 track = mobj.group('track').strip()
2373 if not artist:
2374 artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
92bc97d3
RA
2375 if not album:
2376 album = mobj.group('album'.strip())
822b9d9c
RA
2377 release_year = mobj.group('release_year')
2378 release_date = mobj.group('release_date')
2379 if release_date:
2380 release_date = release_date.replace('-', '')
2381 if not release_year:
2382 release_year = int(release_date[:4])
2383 if release_year:
2384 release_year = int(release_year)
7e72694b 2385
9322f116 2386 yt_initial = self._get_yt_initial_data(video_id, video_webpage)
2387 if yt_initial:
2388 music_metadata = self._get_music_metadata_from_yt_initial(yt_initial)
2389 if len(music_metadata):
2390 album = music_metadata[0].get('album')
2391 artist = music_metadata[0].get('artist')
2392 track = music_metadata[0].get('track')
2393
7e72694b
S
2394 m_episode = re.search(
2395 r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
2396 video_webpage)
2397 if m_episode:
c2dd2dc0 2398 series = unescapeHTML(m_episode.group('series'))
7e72694b
S
2399 season_number = int(m_episode.group('season'))
2400 episode_number = int(m_episode.group('episode'))
2401 else:
2402 series = season_number = episode_number = None
2403
2404 m_cat_container = self._search_regex(
2405 r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
2406 video_webpage, 'categories', default=None)
dbeafce5 2407 category = None
7e72694b
S
2408 if m_cat_container:
2409 category = self._html_search_regex(
2410 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
2411 default=None)
dbeafce5
S
2412 if not category:
2413 category = try_get(
2414 microformat, lambda x: x['category'], compat_str)
2415 video_categories = None if category is None else [category]
7e72694b
S
2416
2417 video_tags = [
2418 unescapeHTML(m.group('content'))
2419 for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
dbeafce5
S
2420 if not video_tags:
2421 video_tags = try_get(video_details, lambda x: x['keywords'], list)
7e72694b
S
2422
2423 def _extract_count(count_name):
2424 return str_to_int(self._search_regex(
a6c666d0 2425 r'"accessibilityData":\{"label":"([\d,\w]+) %ss"\}'
7e72694b
S
2426 % re.escape(count_name),
2427 video_webpage, count_name, default=None))
2428
2429 like_count = _extract_count('like')
2430 dislike_count = _extract_count('dislike')
2431
dbdaaa23
S
2432 if view_count is None:
2433 view_count = str_to_int(self._search_regex(
2434 r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
2435 'view count', default=None))
2436
bf3c9326
S
2437 average_rating = (
2438 float_or_none(video_details.get('averageRating'))
2439 or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
2440
7e72694b 2441 # subtitles
321bf820 2442 video_subtitles = self.extract_subtitles(
2443 video_id, video_webpage, has_live_chat_replay)
7e72694b
S
2444 automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
2445
2446 video_duration = try_get(
2447 video_info, lambda x: int_or_none(x['length_seconds'][0]))
dbdaaa23
S
2448 if not video_duration:
2449 video_duration = int_or_none(video_details.get('lengthSeconds'))
7e72694b
S
2450 if not video_duration:
2451 video_duration = parse_duration(self._html_search_meta(
2452 'duration', video_webpage, 'video duration'))
2453
b84071c0
JP
2454 # Get Subscriber Count of channel
2455 subscriber_count = parse_count(self._search_regex(
2456 r'"text":"([\d\.]+\w?) subscribers"',
2457 video_webpage,
2458 'subscriber count',
2459 default=None
2460 ))
2461
7e72694b
S
2462 # annotations
2463 video_annotations = None
2464 if self._downloader.params.get('writeannotations', False):
64b6a4e9
RA
2465 xsrf_token = self._search_regex(
2466 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
2467 video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
2468 invideo_url = try_get(
2469 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2470 if xsrf_token and invideo_url:
2471 xsrf_field_name = self._search_regex(
2472 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2473 video_webpage, 'xsrf field name',
2474 group='xsrf_field_name', default='session_token')
2475 video_annotations = self._download_webpage(
2476 self._proto_relative_url(invideo_url),
2477 video_id, note='Downloading annotations',
2478 errnote='Unable to download video annotations', fatal=False,
2479 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2480
84213ea8 2481 chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
7e72694b 2482
dd27fd17 2483 # Look for the DASH manifest
203fb43f 2484 if self._downloader.params.get('youtube_include_dash_manifest', True):
77c6fb5b 2485 dash_mpd_fatal = True
8ff648e4 2486 for mpd_url in dash_mpds:
d8d24a92 2487 dash_formats = {}
774e208f 2488 try:
05d0d131
YCH
2489 def decrypt_sig(mobj):
2490 s = mobj.group(1)
2491 dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
2492 return '/signature/%s' % dec_s
2493
8ff648e4 2494 mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
2d2fa82d 2495
8ff648e4 2496 for df in self._extract_mpd_formats(
2497 mpd_url, video_id, fatal=dash_mpd_fatal,
2498 formats_dict=self._formats):
c63ca0ee
S
2499 if not df.get('filesize'):
2500 df['filesize'] = _extract_filesize(df['url'])
d8d24a92
S
2501 # Do not overwrite DASH format found in some previous DASH manifest
2502 if df['format_id'] not in dash_formats:
2503 dash_formats[df['format_id']] = df
77c6fb5b
S
2504 # Additional DASH manifests may end up in HTTP Error 403 therefore
2505 # allow them to fail without bug report message if we already have
2506 # some DASH manifest succeeded. This is temporary workaround to reduce
2507 # burst of bug reports until we figure out the reason and whether it
2508 # can be fixed at all.
2509 dash_mpd_fatal = False
774e208f
PH
2510 except (ExtractorError, KeyError) as e:
2511 self.report_warning(
2512 'Skipping DASH manifest: %r' % e, video_id)
d8d24a92 2513 if dash_formats:
04b3b3df
JMF
2514 # Remove the formats we found through non-DASH, they
2515 # contain less info and it can be wrong, because we use
2516 # fixed values (for example the resolution). See
067aa17e 2517 # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
04b3b3df 2518 # example.
d80265cc 2519 formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
d8d24a92 2520 formats.extend(dash_formats.values())
d80044c2 2521
6271f1ca
PH
2522 # Check for malformed aspect ratio
2523 stretched_m = re.search(
2524 r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
2525 video_webpage)
2526 if stretched_m:
313dfc45
LL
2527 w = float(stretched_m.group('w'))
2528 h = float(stretched_m.group('h'))
5faf9fed
S
2529 # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
2530 # We will only process correct ratios.
313dfc45 2531 if w > 0 and h > 0:
41f24c32 2532 ratio = w / h
313dfc45
LL
2533 for f in formats:
2534 if f.get('vcodec') != 'none':
2535 f['stretched_ratio'] = ratio
6271f1ca 2536
026fbedc 2537 if not formats:
43ebf77d
S
2538 if 'reason' in video_info:
2539 if 'The uploader has not made this video available in your country.' in video_info['reason']:
2540 regions_allowed = self._html_search_meta(
2541 'regionsAllowed', video_webpage, default=None)
2542 countries = regions_allowed.split(',') if regions_allowed else None
2543 self.raise_geo_restricted(
2544 msg=video_info['reason'][0], countries=countries)
2545 reason = video_info['reason'][0]
2546 if 'Invalid parameters' in reason:
2547 unavailable_message = extract_unavailable_message()
2548 if unavailable_message:
2549 reason = unavailable_message
2550 raise ExtractorError(
2551 'YouTube said: %s' % reason,
2552 expected=True, video_id=video_id)
2553 if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
2554 raise ExtractorError('This video is DRM protected.', expected=True)
0d297518 2555
4bcc7bd1 2556 self._sort_formats(formats)
4ea3be0a 2557
21c340b8 2558 self.mark_watched(video_id, video_info, player_response)
d77ab8e2 2559
4ea3be0a 2560 return {
8bcc8756
JW
2561 'id': video_id,
2562 'uploader': video_uploader,
2563 'uploader_id': video_uploader_id,
fd050249 2564 'uploader_url': video_uploader_url,
dd4c4492
S
2565 'channel_id': channel_id,
2566 'channel_url': channel_url,
8bcc8756 2567 'upload_date': upload_date,
7caf9830 2568 'license': video_license,
936784b2 2569 'creator': video_creator or artist,
8bcc8756 2570 'title': video_title,
936784b2 2571 'alt_title': video_alt_title or track,
b477fc13 2572 'thumbnails': thumbnails,
8bcc8756
JW
2573 'description': video_description,
2574 'categories': video_categories,
000b6b5a 2575 'tags': video_tags,
8bcc8756 2576 'subtitles': video_subtitles,
360e1ca5 2577 'automatic_captions': automatic_captions,
8bcc8756
JW
2578 'duration': video_duration,
2579 'age_limit': 18 if age_gate else 0,
2580 'annotations': video_annotations,
9cafc3fd 2581 'chapters': chapters,
7e8c0af0 2582 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
8bcc8756 2583 'view_count': view_count,
4ea3be0a 2584 'like_count': like_count,
2585 'dislike_count': dislike_count,
bf3c9326 2586 'average_rating': average_rating,
8bcc8756 2587 'formats': formats,
2fe1ff85 2588 'is_live': is_live,
7c80519c 2589 'start_time': start_time,
297a564b 2590 'end_time': end_time,
12afdc2a
S
2591 'series': series,
2592 'season_number': season_number,
2593 'episode_number': episode_number,
936784b2
S
2594 'track': track,
2595 'artist': artist,
5caabd3c 2596 'album': album,
2597 'release_date': release_date,
2598 'release_year': release_year,
b84071c0 2599 'subscriber_count': subscriber_count,
4ea3be0a 2600 }
c5e8d7af 2601
5f6a1245 2602
8e7aad20 2603class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2604 IE_DESC = 'YouTube.com playlists'
d67cc9fa 2605 _VALID_URL = r"""(?x)(?:
c5e8d7af
PH
2606 (?:https?://)?
2607 (?:\w+\.)?
c5e8d7af 2608 (?:
c0345b82 2609 (?:
66b48727 2610 youtube(?:kids)?\.com|
c0345b82
S
2611 invidio\.us
2612 )
2613 /
feaa5ad7 2614 (?:
87dadd45 2615 (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
feaa5ad7
S
2616 \? (?:.*?[&;])*? (?:p|a|list)=
2617 | p/
2618 )|
2619 youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
c5e8d7af 2620 )
d67cc9fa 2621 (
66b48727 2622 (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
5f6a1245 2623 # Top tracks, they can also include dots
d67cc9fa
JMF
2624 |(?:MC)[\w\.]*
2625 )
c5e8d7af
PH
2626 .*
2627 |
d0ba5587
S
2628 (%(playlist_id)s)
2629 )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
8d81f3e3 2630 _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
351f37c0
S
2631 _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
2632 _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
78caa52a 2633 IE_NAME = 'youtube:playlist'
81127aa5 2634 _TESTS = [{
0e30a7b9 2635 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2636 'info_dict': {
0e30a7b9 2637 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2638 'uploader': 'Sergey M.',
2639 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2640 'title': 'youtube-dl public playlist',
81127aa5 2641 },
0e30a7b9 2642 'playlist_count': 1,
9291475f 2643 }, {
0e30a7b9 2644 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2645 'info_dict': {
0e30a7b9 2646 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2647 'uploader': 'Sergey M.',
2648 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2649 'title': 'youtube-dl empty playlist',
9291475f
PH
2650 },
2651 'playlist_count': 0,
2652 }, {
2653 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2654 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2655 'info_dict': {
2656 'title': '29C3: Not my department',
acf757f4 2657 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
13a75688
S
2658 'uploader': 'Christiaan008',
2659 'uploader_id': 'ChRiStIaAn008',
9291475f 2660 },
0e30a7b9 2661 'playlist_count': 96,
9291475f
PH
2662 }, {
2663 'note': 'issue #673',
2664 'url': 'PLBB231211A4F62143',
2665 'info_dict': {
f46a8702 2666 'title': '[OLD]Team Fortress 2 (Class-based LP)',
acf757f4 2667 'id': 'PLBB231211A4F62143',
13a75688
S
2668 'uploader': 'Wickydoo',
2669 'uploader_id': 'Wickydoo',
9291475f
PH
2670 },
2671 'playlist_mincount': 26,
2672 }, {
2673 'note': 'Large playlist',
2674 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
2675 'info_dict': {
2676 'title': 'Uploads from Cauchemar',
acf757f4 2677 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
13a75688
S
2678 'uploader': 'Cauchemar',
2679 'uploader_id': 'Cauchemar89',
9291475f
PH
2680 },
2681 'playlist_mincount': 799,
2682 }, {
2683 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
2684 'info_dict': {
2685 'title': 'YDL_safe_search',
acf757f4 2686 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
9291475f
PH
2687 },
2688 'playlist_count': 2,
4201ba13 2689 'skip': 'This playlist is private',
ac7553d0
PH
2690 }, {
2691 'note': 'embedded',
2d3d2997 2692 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
ac7553d0
PH
2693 'playlist_count': 4,
2694 'info_dict': {
2695 'title': 'JODA15',
acf757f4 2696 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
13a75688
S
2697 'uploader': 'milan',
2698 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
ac7553d0 2699 }
87dadd45
S
2700 }, {
2701 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
2702 'playlist_mincount': 485,
2703 'info_dict': {
13a75688 2704 'title': '2018 Chinese New Singles (11/6 updated)',
87dadd45 2705 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
13a75688
S
2706 'uploader': 'LBK',
2707 'uploader_id': 'sdragonfang',
87dadd45 2708 }
6b08cdf6
PH
2709 }, {
2710 'note': 'Embedded SWF player',
2d3d2997 2711 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
6b08cdf6
PH
2712 'playlist_count': 4,
2713 'info_dict': {
2714 'title': 'JODA7',
acf757f4 2715 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
13a75688
S
2716 },
2717 'skip': 'This playlist does not exist',
4b7df0d3
JMF
2718 }, {
2719 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2720 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2721 'info_dict': {
acf757f4
PH
2722 'title': 'Uploads from Interstellar Movie',
2723 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688
S
2724 'uploader': 'Interstellar Movie',
2725 'uploader_id': 'InterstellarMovie1',
4b7df0d3 2726 },
481cc733 2727 'playlist_mincount': 21,
dacb3a86
S
2728 }, {
2729 # Playlist URL that does not actually serve a playlist
2730 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2731 'info_dict': {
2732 'id': 'FqZTN594JQw',
2733 'ext': 'webm',
2734 'title': "Smiley's People 01 detective, Adventure Series, Action",
2735 'uploader': 'STREEM',
2736 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2737 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2738 'upload_date': '20150526',
2739 'license': 'Standard YouTube License',
2740 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2741 'categories': ['People & Blogs'],
2742 'tags': list,
dbdaaa23 2743 'view_count': int,
dacb3a86
S
2744 'like_count': int,
2745 'dislike_count': int,
2746 },
2747 'params': {
2748 'skip_download': True,
2749 },
13a75688 2750 'skip': 'This video is not available.',
dacb3a86 2751 'add_ie': [YoutubeIE.ie_key()],
481cc733
S
2752 }, {
2753 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
2754 'info_dict': {
2755 'id': 'yeWKywCrFtk',
2756 'ext': 'mp4',
2757 'title': 'Small Scale Baler and Braiding Rugs',
2758 'uploader': 'Backus-Page House Museum',
2759 'uploader_id': 'backuspagemuseum',
ec85ded8 2760 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
481cc733 2761 'upload_date': '20161008',
481cc733
S
2762 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
2763 'categories': ['Nonprofits & Activism'],
2764 'tags': list,
2765 'like_count': int,
2766 'dislike_count': int,
2767 },
2768 'params': {
2769 'noplaylist': True,
2770 'skip_download': True,
2771 },
2e18adec
S
2772 }, {
2773 # https://github.com/ytdl-org/youtube-dl/issues/21844
2774 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2775 'info_dict': {
2776 'title': 'Data Analysis with Dr Mike Pound',
2777 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2778 'uploader_id': 'Computerphile',
2779 'uploader': 'Computerphile',
2780 },
2781 'playlist_mincount': 11,
feaa5ad7
S
2782 }, {
2783 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
2784 'only_matching': True,
a6857510
S
2785 }, {
2786 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
2787 'only_matching': True,
409b9324
S
2788 }, {
2789 # music album playlist
2790 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
2791 'only_matching': True,
c0345b82
S
2792 }, {
2793 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
2794 'only_matching': True,
66b48727
RA
2795 }, {
2796 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
2797 'only_matching': True,
81127aa5 2798 }]
c5e8d7af 2799
880e1c52
JMF
2800 def _real_initialize(self):
2801 self._login()
2802
351f37c0
S
2803 def extract_videos_from_page(self, page):
2804 ids_in_page = []
2805 titles_in_page = []
2806
2807 for item in re.findall(
2808 r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
2809 attrs = extract_attributes(item)
2810 video_id = attrs['data-video-id']
2811 video_title = unescapeHTML(attrs.get('data-title'))
2812 if video_title:
2813 video_title = video_title.strip()
2814 ids_in_page.append(video_id)
2815 titles_in_page.append(video_title)
2816
2817 # Fallback with old _VIDEO_RE
2818 self.extract_videos_from_page_impl(
2819 self._VIDEO_RE, page, ids_in_page, titles_in_page)
2820
2821 # Relaxed fallbacks
2822 self.extract_videos_from_page_impl(
2823 r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
2824 ids_in_page, titles_in_page)
2825 self.extract_videos_from_page_impl(
2826 r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
2827 ids_in_page, titles_in_page)
2828
2829 return zip(ids_in_page, titles_in_page)
2830
5b0a6a80 2831 def _extract_mix_ids_from_yt_initial(self, yt_initial):
2832 ids = []
5c15c1a0 2833 playlist_contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['contents'], list)
2834 if playlist_contents:
5b0a6a80 2835 for item in playlist_contents:
5c15c1a0 2836 videoId = try_get(item, lambda x: x['playlistPanelVideoRenderer']['videoId'], compat_str)
2837 if videoId:
5b0a6a80 2838 ids.append(videoId)
2839 return ids
2840
652cdaa2 2841 def _extract_mix(self, playlist_id):
99209c29 2842 # The mixes are generated from a single video
652cdaa2 2843 # the id of the playlist is just 'RD' + video_id
1b6182d8
JMF
2844 ids = []
2845 last_id = playlist_id[-11:]
2846 for n in itertools.count(1):
07af16b9 2847 url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
1b6182d8
JMF
2848 webpage = self._download_webpage(
2849 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
2850 new_ids = orderedSet(re.findall(
2851 r'''(?xs)data-video-username=".*?".*?
2852 href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
2853 webpage))
5b0a6a80 2854
2855 # if no ids in html of page, try using embedded json
2856 if (len(new_ids) == 0):
2857 yt_initial = self._get_yt_initial_data(playlist_id, webpage)
2858 if yt_initial:
2859 new_ids = self._extract_mix_ids_from_yt_initial(yt_initial)
2860
1b6182d8
JMF
2861 # Fetch new pages until all the videos are repeated, it seems that
2862 # there are always 51 unique videos.
2863 new_ids = [_id for _id in new_ids if _id not in ids]
2864 if not new_ids:
2865 break
2866 ids.extend(new_ids)
2867 last_id = ids[-1]
2868
2869 url_results = self._ids_to_results(ids)
2870
bc2f773b 2871 search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
c9cc0bf5 2872 title_span = (
3089bc74
S
2873 search_title('playlist-title')
2874 or search_title('title long-title')
2875 or search_title('title'))
76d1700b 2876 title = clean_html(title_span)
652cdaa2
JMF
2877
2878 return self.playlist_result(url_results, playlist_id, title)
2879
448830ce 2880 def _extract_playlist(self, playlist_id):
dbb94fb0
S
2881 url = self._TEMPLATE_URL % playlist_id
2882 page = self._download_webpage(url, playlist_id)
dbb94fb0 2883
067aa17e 2884 # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
8bc0800d 2885 for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
39b62db1
YCH
2886 match = match.strip()
2887 # Check if the playlist exists or is private
4201ba13
S
2888 mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
2889 if mobj:
2890 reason = mobj.group('reason')
2891 message = 'This playlist %s' % reason
2892 if 'private' in reason:
2893 message += ', use --username or --netrc to access it'
2894 message += '.'
2895 raise ExtractorError(message, expected=True)
39b62db1
YCH
2896 elif re.match(r'[^<]*Invalid parameters[^<]*', match):
2897 raise ExtractorError(
2898 'Invalid parameters. Maybe URL is incorrect.',
2899 expected=True)
2900 elif re.match(r'[^<]*Choose your language[^<]*', match):
2901 continue
2902 else:
2903 self.report_warning('Youtube gives an alert message: ' + match)
10c0e2d8 2904
dbb94fb0 2905 playlist_title = self._html_search_regex(
63b4295d 2906 r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
dacb3a86 2907 page, 'title', default=None)
c5e8d7af 2908
07aeced6 2909 _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
4e3f1f04 2910 uploader = self._html_search_regex(
07aeced6
S
2911 r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
2912 page, 'uploader', default=None)
2913 mobj = re.search(
2914 r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
2915 page)
2916 if mobj:
2917 uploader_id = mobj.group('uploader_id')
2918 uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
2919 else:
2920 uploader_id = uploader_url = None
2921
dacb3a86
S
2922 has_videos = True
2923
2924 if not playlist_title:
2925 try:
2926 # Some playlist URLs don't actually serve a playlist (e.g.
2927 # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
2928 next(self._entries(page, playlist_id))
2929 except StopIteration:
2930 has_videos = False
2931
07aeced6 2932 playlist = self.playlist_result(
dacb3a86 2933 self._entries(page, playlist_id), playlist_id, playlist_title)
07aeced6
S
2934 playlist.update({
2935 'uploader': uploader,
2936 'uploader_id': uploader_id,
2937 'uploader_url': uploader_url,
2938 })
2939
2940 return has_videos, playlist
c5e8d7af 2941
ebf1b291 2942 def _check_download_just_video(self, url, playlist_id):
448830ce
S
2943 # Check if it's a video-specific URL
2944 query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
481cc733 2945 video_id = query_dict.get('v', [None])[0] or self._search_regex(
87dadd45 2946 r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
481cc733
S
2947 'video id', default=None)
2948 if video_id:
448830ce
S
2949 if self._downloader.params.get('noplaylist'):
2950 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
dacb3a86 2951 return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce
S
2952 else:
2953 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
dacb3a86
S
2954 return video_id, None
2955 return None, None
448830ce 2956
ebf1b291
S
2957 def _real_extract(self, url):
2958 # Extract playlist id
2959 mobj = re.match(self._VALID_URL, url)
2960 if mobj is None:
2961 raise ExtractorError('Invalid URL: %s' % url)
2962 playlist_id = mobj.group(1) or mobj.group(2)
2963
dacb3a86 2964 video_id, video = self._check_download_just_video(url, playlist_id)
ebf1b291
S
2965 if video:
2966 return video
2967
466a6145 2968 if playlist_id.startswith(('RD', 'UL', 'PU')):
448830ce
S
2969 # Mixes require a custom extraction process
2970 return self._extract_mix(playlist_id)
2971
dacb3a86
S
2972 has_videos, playlist = self._extract_playlist(playlist_id)
2973 if has_videos or not video_id:
2974 return playlist
2975
2976 # Some playlist URLs don't actually serve a playlist (see
067aa17e 2977 # https://github.com/ytdl-org/youtube-dl/issues/10537).
dacb3a86
S
2978 # Fallback to plain video extraction if there is a video id
2979 # along with playlist id.
2980 return self.url_result(video_id, 'Youtube', video_id=video_id)
448830ce 2981
c5e8d7af 2982
648e6a1f 2983class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
78caa52a 2984 IE_DESC = 'YouTube.com channels'
66b48727 2985 _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
eb0f3e7e 2986 _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
648e6a1f 2987 _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
78caa52a 2988 IE_NAME = 'youtube:channel'
cdc628a4
PH
2989 _TESTS = [{
2990 'note': 'paginated channel',
2991 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
2992 'playlist_mincount': 91,
acf757f4 2993 'info_dict': {
9170ca5b
JMF
2994 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
2995 'title': 'Uploads from lex will',
13a75688
S
2996 'uploader': 'lex will',
2997 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
acf757f4 2998 }
5c43afd4
JMF
2999 }, {
3000 'note': 'Age restricted channel',
3001 # from https://www.youtube.com/user/DeusExOfficial
3002 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
3003 'playlist_mincount': 64,
3004 'info_dict': {
3005 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
3006 'title': 'Uploads from Deus Ex',
13a75688
S
3007 'uploader': 'Deus Ex',
3008 'uploader_id': 'DeusExOfficial',
5c43afd4 3009 },
cd5a74a2
S
3010 }, {
3011 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
3012 'only_matching': True,
66b48727
RA
3013 }, {
3014 'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
3015 'only_matching': True,
cdc628a4 3016 }]
c5e8d7af 3017
e462474e
S
3018 @classmethod
3019 def suitable(cls, url):
f07e276a
S
3020 return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
3021 else super(YoutubeChannelIE, cls).suitable(url))
e462474e 3022
9558dcec
S
3023 def _build_template_url(self, url, channel_id):
3024 return self._TEMPLATE_URL % channel_id
3025
c5e8d7af 3026 def _real_extract(self, url):
9ff67727 3027 channel_id = self._match_id(url)
c5e8d7af 3028
9558dcec 3029 url = self._build_template_url(url, channel_id)
386bdfa6
S
3030
3031 # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
3032 # Workaround by extracting as a playlist if managed to obtain channel playlist URL
3033 # otherwise fallback on channel by page extraction
3034 channel_page = self._download_webpage(
3035 url + '?view=57', channel_id,
3036 'Downloading channel page', fatal=False)
2b3c2546
PH
3037 if channel_page is False:
3038 channel_playlist_id = False
3039 else:
3040 channel_playlist_id = self._html_search_meta(
3041 'channelId', channel_page, 'channel id', default=None)
3042 if not channel_playlist_id:
73c4ac2c
S
3043 channel_url = self._html_search_meta(
3044 ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
3045 channel_page, 'channel url', default=None)
3046 if channel_url:
3047 channel_playlist_id = self._search_regex(
3048 r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
3049 channel_url, 'channel id', default=None)
386bdfa6
S
3050 if channel_playlist_id and channel_playlist_id.startswith('UC'):
3051 playlist_id = 'UU' + channel_playlist_id[2:]
d2a9de78
IK
3052 return self.url_result(
3053 compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
386bdfa6 3054
60bf45c8 3055 channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
31812a9e
PH
3056 autogenerated = re.search(r'''(?x)
3057 class="[^"]*?(?:
3058 channel-header-autogenerated-label|
3059 yt-channel-title-autogenerated
3060 )[^"]*"''', channel_page) is not None
c5e8d7af 3061
b9643eed
JMF
3062 if autogenerated:
3063 # The videos are contained in a single page
3064 # the ajax pages can't be used, they are empty
b82f815f 3065 entries = [
fb69240c
S
3066 self.url_result(
3067 video_id, 'Youtube', video_id=video_id,
3068 video_title=video_title)
8f02ad4f 3069 for video_id, video_title in self.extract_videos_from_page(channel_page)]
b82f815f
PH
3070 return self.playlist_result(entries, channel_id)
3071
73c4ac2c
S
3072 try:
3073 next(self._entries(channel_page, channel_id))
3074 except StopIteration:
3075 alert_message = self._html_search_regex(
3076 r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
3077 channel_page, 'alert', default=None, group='alert')
3078 if alert_message:
3079 raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
3080
648e6a1f 3081 return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
c5e8d7af
PH
3082
3083
eb0f3e7e 3084class YoutubeUserIE(YoutubeChannelIE):
78caa52a 3085 IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
39e7107d 3086 _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9%-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_%-]+)'
9558dcec 3087 _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
78caa52a 3088 IE_NAME = 'youtube:user'
c5e8d7af 3089
cdc628a4
PH
3090 _TESTS = [{
3091 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
3092 'playlist_mincount': 320,
3093 'info_dict': {
73c4ac2c
S
3094 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
3095 'title': 'Uploads from The Linux Foundation',
13a75688
S
3096 'uploader': 'The Linux Foundation',
3097 'uploader_id': 'TheLinuxFoundation',
cdc628a4 3098 }
9558dcec
S
3099 }, {
3100 # Only available via https://www.youtube.com/c/12minuteathlete/videos
3101 # but not https://www.youtube.com/user/12minuteathlete/videos
3102 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
3103 'playlist_mincount': 249,
3104 'info_dict': {
3105 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
3106 'title': 'Uploads from 12 Minute Athlete',
13a75688
S
3107 'uploader': '12 Minute Athlete',
3108 'uploader_id': 'the12minuteathlete',
9558dcec 3109 }
cdc628a4
PH
3110 }, {
3111 'url': 'ytuser:phihag',
3112 'only_matching': True,
daa0df9e
YCH
3113 }, {
3114 'url': 'https://www.youtube.com/c/gametrailers',
3115 'only_matching': True,
39e7107d
U
3116 }, {
3117 'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak',
3118 'only_matching': True,
9558dcec
S
3119 }, {
3120 'url': 'https://www.youtube.com/gametrailers',
3121 'only_matching': True,
73c4ac2c 3122 }, {
0e879f43 3123 # This channel is not available, geo restricted to JP
73c4ac2c
S
3124 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
3125 'only_matching': True,
cdc628a4
PH
3126 }]
3127
e3ea4790 3128 @classmethod
f4b05232 3129 def suitable(cls, url):
e3ea4790
JMF
3130 # Don't return True if the url can be extracted with other youtube
3131 # extractor, the regex would is too permissive and it would match.
f3a58d46 3132 other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
3133 if any(ie.suitable(url) for ie in other_yt_ies):
5f6a1245
JW
3134 return False
3135 else:
3136 return super(YoutubeUserIE, cls).suitable(url)
f4b05232 3137
9558dcec
S
3138 def _build_template_url(self, url, channel_id):
3139 mobj = re.match(self._VALID_URL, url)
3140 return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
3141
b05654f0 3142
f07e276a
S
3143class YoutubeLiveIE(YoutubeBaseInfoExtractor):
3144 IE_DESC = 'YouTube.com live streams'
073d5bf5 3145 _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
f07e276a
S
3146 IE_NAME = 'youtube:live'
3147
3148 _TESTS = [{
2d3d2997 3149 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
f07e276a
S
3150 'info_dict': {
3151 'id': 'a48o2S1cPoo',
3152 'ext': 'mp4',
3153 'title': 'The Young Turks - Live Main Show',
3154 'uploader': 'The Young Turks',
3155 'uploader_id': 'TheYoungTurks',
ec85ded8 3156 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
f07e276a
S
3157 'upload_date': '20150715',
3158 'license': 'Standard YouTube License',
3159 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3160 'categories': ['News & Politics'],
3161 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3162 'like_count': int,
3163 'dislike_count': int,
3164 },
3165 'params': {
3166 'skip_download': True,
3167 },
3168 }, {
2d3d2997 3169 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
f07e276a 3170 'only_matching': True,
c1b2a085
S
3171 }, {
3172 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3173 'only_matching': True,
073d5bf5
S
3174 }, {
3175 'url': 'https://www.youtube.com/TheYoungTurks/live',
3176 'only_matching': True,
f07e276a
S
3177 }]
3178
3179 def _real_extract(self, url):
3180 mobj = re.match(self._VALID_URL, url)
3181 channel_id = mobj.group('id')
3182 base_url = mobj.group('base_url')
3183 webpage = self._download_webpage(url, channel_id, fatal=False)
3184 if webpage:
3185 page_type = self._og_search_property(
e7f3529f 3186 'type', webpage, 'page type', default='')
f07e276a
S
3187 video_id = self._html_search_meta(
3188 'videoId', webpage, 'video id', default=None)
e7f3529f
S
3189 if page_type.startswith('video') and video_id and re.match(
3190 r'^[0-9A-Za-z_-]{11}$', video_id):
f07e276a
S
3191 return self.url_result(video_id, YoutubeIE.ie_key())
3192 return self.url_result(base_url)
3193
3194
e462474e
S
3195class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
3196 IE_DESC = 'YouTube.com user/channel playlists'
e942cfd1 3197 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
e462474e 3198 IE_NAME = 'youtube:playlists'
0c148415 3199
e568c223 3200 _TESTS = [{
2d3d2997 3201 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
0c148415
S
3202 'playlist_mincount': 4,
3203 'info_dict': {
3204 'id': 'ThirstForScience',
13a75688 3205 'title': 'ThirstForScience',
0c148415 3206 },
e568c223
S
3207 }, {
3208 # with "Load more" button
2d3d2997 3209 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
e568c223
S
3210 'playlist_mincount': 70,
3211 'info_dict': {
3212 'id': 'igorkle1',
3213 'title': 'Игорь Клейнер',
3214 },
e462474e
S
3215 }, {
3216 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
3217 'playlist_mincount': 17,
3218 'info_dict': {
3219 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
3220 'title': 'Chem Player',
3221 },
13a75688 3222 'skip': 'Blocked',
e942cfd1
S
3223 }, {
3224 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3225 'only_matching': True,
e568c223 3226 }]
0c148415
S
3227
3228
870f3bfc
S
3229class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
3230 _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
3231
3232
3233class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
78caa52a 3234 IE_DESC = 'YouTube.com searches'
b4c08069
JMF
3235 # there doesn't appear to be a real limit, for example if you search for
3236 # 'python' you get more than 8.000.000 results
3237 _MAX_RESULTS = float('inf')
78caa52a 3238 IE_NAME = 'youtube:search'
b05654f0 3239 _SEARCH_KEY = 'ytsearch'
6c894ea1 3240 _SEARCH_PARAMS = None
9dd8e46a 3241 _TESTS = []
b05654f0 3242
6c894ea1
U
3243 def _entries(self, query, n):
3244 data = {
3245 'context': {
3246 'client': {
3247 'clientName': 'WEB',
3248 'clientVersion': '2.20201021.03.00',
3249 }
3250 },
3251 'query': query,
a22b2fd1 3252 }
6c894ea1
U
3253 if self._SEARCH_PARAMS:
3254 data['params'] = self._SEARCH_PARAMS
3255 total = 0
3256 for page_num in itertools.count(1):
3257 search = self._download_json(
3258 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
3259 video_id='query "%s"' % query,
3260 note='Downloading page %s' % page_num,
3261 errnote='Unable to download API page', fatal=False,
3262 data=json.dumps(data).encode('utf8'),
3263 headers={'content-type': 'application/json'})
3264 if not search:
b4c08069 3265 break
6c894ea1
U
3266 slr_contents = try_get(
3267 search,
3268 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3269 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3270 list)
3271 if not slr_contents:
a22b2fd1 3272 break
6c894ea1
U
3273 isr_contents = try_get(
3274 slr_contents,
3275 lambda x: x[0]['itemSectionRenderer']['contents'],
3276 list)
3277 if not isr_contents:
3278 break
3279 for content in isr_contents:
3280 if not isinstance(content, dict):
3281 continue
3282 video = content.get('videoRenderer')
3283 if not isinstance(video, dict):
3284 continue
3285 video_id = video.get('videoId')
3286 if not video_id:
3287 continue
3288 title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
3289 description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
3290 duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
3291 view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
3292 view_count = int_or_none(self._search_regex(
3293 r'^(\d+)', re.sub(r'\s', '', view_count_text),
3294 'view count', default=None))
3295 uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
3296 total += 1
3297 yield {
3298 '_type': 'url_transparent',
3299 'ie_key': YoutubeIE.ie_key(),
3300 'id': video_id,
3301 'url': video_id,
3302 'title': title,
3303 'description': description,
3304 'duration': duration,
3305 'view_count': view_count,
3306 'uploader': uploader,
3307 }
3308 if total == n:
3309 return
3310 token = try_get(
3311 slr_contents,
3312 lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3313 compat_str)
3314 if not token:
3315 break
3316 data['continuation'] = token
b05654f0 3317
6c894ea1
U
3318 def _get_n_results(self, query, n):
3319 """Get a specified number of results for a query"""
3320 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3321
c9ae7b95 3322
a3dd9248 3323class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3324 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3325 _SEARCH_KEY = 'ytsearchdate'
78caa52a 3326 IE_DESC = 'YouTube.com searches, newest videos first'
6c894ea1 3327 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3328
c9ae7b95 3329
870f3bfc 3330class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
78caa52a
PH
3331 IE_DESC = 'YouTube.com search URLs'
3332 IE_NAME = 'youtube:search_url'
d2c1f79f 3333 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
c0a1a892 3334 _SEARCH_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
cdc628a4 3335 _TESTS = [{
3867038a 3336 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
cdc628a4
PH
3337 'playlist_mincount': 5,
3338 'info_dict': {
3867038a 3339 'title': 'youtube-dl test video',
cdc628a4 3340 }
d2c1f79f
S
3341 }, {
3342 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3343 'only_matching': True,
cdc628a4 3344 }]
c9ae7b95 3345
e03b4f3e 3346 def _find_videos_in_json(self, extracted):
3347 videos = []
3348
3349 def _real_find(obj):
3350 if obj is None or isinstance(obj, str):
3351 return
3352
3353 if type(obj) is list:
3354 for elem in obj:
3355 _real_find(elem)
3356
3357 if type(obj) is dict:
3358 if "videoId" in obj:
3359 videos.append(obj)
3360 return
3361
3362 for _, o in obj.items():
3363 _real_find(o)
3364
3365 _real_find(extracted)
3366
3367 return videos
3368
19f671f8 3369 def extract_videos_from_page_impl(self, page, ids_in_page, titles_in_page):
3370 search_response = self._parse_json(self._search_regex(self._SEARCH_DATA, page, 'ytInitialData'), None)
3371
e03b4f3e 3372 result_items = self._find_videos_in_json(search_response)
19f671f8 3373
955c4cb6 3374 for renderer in result_items:
3375 video_id = try_get(renderer, lambda x: x['videoId'])
3376 video_title = try_get(renderer, lambda x: x['title']['runs'][0]['text']) or try_get(renderer, lambda x: x['title']['simpleText'])
19f671f8 3377
3378 if video_id is None or video_title is None:
955c4cb6 3379 # we do not have a videoRenderer or title extraction broke
19f671f8 3380 continue
3381
3382 video_title = video_title.strip()
3383
3384 try:
3385 idx = ids_in_page.index(video_id)
3386 if video_title and not titles_in_page[idx]:
3387 titles_in_page[idx] = video_title
3388 except ValueError:
3389 ids_in_page.append(video_id)
3390 titles_in_page.append(video_title)
3391
3392 def extract_videos_from_page(self, page):
3393 ids_in_page = []
3394 titles_in_page = []
3395 self.extract_videos_from_page_impl(page, ids_in_page, titles_in_page)
3396 return zip(ids_in_page, titles_in_page)
3397
c9ae7b95
PH
3398 def _real_extract(self, url):
3399 mobj = re.match(self._VALID_URL, url)
7fd002c0 3400 query = compat_urllib_parse_unquote_plus(mobj.group('query'))
c9ae7b95 3401 webpage = self._download_webpage(url, query)
175c2e9e 3402 return self.playlist_result(self._process_page(webpage), playlist_title=query)
c9ae7b95
PH
3403
3404
136dadde 3405class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
78caa52a 3406 IE_DESC = 'YouTube.com (multi-season) shows'
92519402 3407 _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
78caa52a 3408 IE_NAME = 'youtube:show'
cdc628a4 3409 _TESTS = [{
4003bd82 3410 'url': 'https://www.youtube.com/show/airdisasters',
8801255d 3411 'playlist_mincount': 5,
cdc628a4
PH
3412 'info_dict': {
3413 'id': 'airdisasters',
3414 'title': 'Air Disasters',
3415 }
3416 }]
75dff0ee
JMF
3417
3418 def _real_extract(self, url):
136dadde
S
3419 playlist_id = self._match_id(url)
3420 return super(YoutubeShowIE, self)._real_extract(
3421 'https://www.youtube.com/show/%s/playlists' % playlist_id)
04cc9617
JMF
3422
3423
b2e8bc1b 3424class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
d7ae0639 3425 """
25f14e9f 3426 Base class for feed extractors
d7ae0639
JMF
3427 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
3428 """
b2e8bc1b 3429 _LOGIN_REQUIRED = True
bea9b005 3430 _FEED_DATA = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
f5360807 3431 _YTCFG_DATA = r"ytcfg.set\(({.*?})\)"
d7ae0639
JMF
3432
3433 @property
3434 def IE_NAME(self):
78caa52a 3435 return 'youtube:%s' % self._FEED_NAME
04cc9617 3436
81f0259b 3437 def _real_initialize(self):
b2e8bc1b 3438 self._login()
81f0259b 3439
5c430b67 3440 def _find_videos_in_json(self, extracted):
3441 videos = []
299056ad 3442 c = {}
5c430b67 3443
3444 def _real_find(obj):
3445 if obj is None or isinstance(obj, str):
3446 return
3447
3448 if type(obj) is list:
3449 for elem in obj:
3450 _real_find(elem)
3451
3452 if type(obj) is dict:
3453 if "videoId" in obj:
3454 videos.append(obj)
3455 return
f5360807 3456
5c430b67 3457 if "nextContinuationData" in obj:
299056ad 3458 c["continuation"] = obj["nextContinuationData"]
f5360807 3459 return
3460
5c430b67 3461 for _, o in obj.items():
3462 _real_find(o)
3463
3464 _real_find(extracted)
3465
299056ad 3466 return videos, try_get(c, lambda x: x["continuation"])
f5360807 3467
3853309f 3468 def _entries(self, page):
5c430b67 3469 info = []
3470
1f93faf6 3471 yt_conf = self._parse_json(self._search_regex(self._YTCFG_DATA, page, 'ytcfg.set', default="null"), None, fatal=False)
5c430b67 3472
3473 search_response = self._parse_json(self._search_regex(self._FEED_DATA, page, 'ytInitialData'), None)
3474
2bc43303 3475 for page_num in itertools.count(1):
5c430b67 3476 video_info, continuation = self._find_videos_in_json(search_response)
62c95fd5 3477
f5360807 3478 new_info = []
5c430b67 3479
3480 for v in video_info:
3481 v_id = try_get(v, lambda x: x['videoId'])
3482 if not v_id:
3483 continue
3484
f5360807 3485 have_video = False
5c430b67 3486 for old in info:
3487 if old['videoId'] == v_id:
3488 have_video = True
3489 break
3490
3491 if not have_video:
3492 new_info.append(v)
3493
3494 if not new_info:
62c95fd5
S
3495 break
3496
5c430b67 3497 info.extend(new_info)
2bc43303 3498
5c430b67 3499 for video in new_info:
f442082a 3500 yield self.url_result(try_get(video, lambda x: x['videoId']), YoutubeIE.ie_key(), video_title=try_get(video, lambda x: x['title']['runs'][0]['text']) or try_get(video, lambda x: x['title']['simpleText']))
3853309f 3501
1f93faf6 3502 if not continuation or not yt_conf:
2bc43303
JMF
3503 break
3504
5c430b67 3505 search_response = self._download_json(
3506 'https://www.youtube.com/browse_ajax', self._PLAYLIST_TITLE,
2bc43303 3507 'Downloading page #%s' % page_num,
d84b21b4 3508 transform_source=uppercase_escape,
5c430b67 3509 query={
3510 "ctoken": try_get(continuation, lambda x: x["continuation"]),
3511 "continuation": try_get(continuation, lambda x: x["continuation"]),
3512 "itct": try_get(continuation, lambda x: x["clickTrackingParams"])
3513 },
3514 headers={
3515 "X-YouTube-Client-Name": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_NAME"]),
3516 "X-YouTube-Client-Version": try_get(yt_conf, lambda x: x["INNERTUBE_CONTEXT_CLIENT_VERSION"]),
3517 "X-Youtube-Identity-Token": try_get(yt_conf, lambda x: x["ID_TOKEN"]),
3518 "X-YouTube-Device": try_get(yt_conf, lambda x: x["DEVICE"]),
3519 "X-YouTube-Page-CL": try_get(yt_conf, lambda x: x["PAGE_CL"]),
3520 "X-YouTube-Page-Label": try_get(yt_conf, lambda x: x["PAGE_BUILD_LABEL"]),
f5360807 3521 "X-YouTube-Variants-Checksum": try_get(yt_conf, lambda x: x["VARIANTS_CHECKSUM"]),
5c430b67 3522 })
2bc43303 3523
3853309f
S
3524 def _real_extract(self, url):
3525 page = self._download_webpage(
3526 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3527 self._PLAYLIST_TITLE)
25f14e9f 3528 return self.playlist_result(
3853309f 3529 self._entries(page), playlist_title=self._PLAYLIST_TITLE)
25f14e9f
S
3530
3531
3532class YoutubeWatchLaterIE(YoutubePlaylistIE):
3533 IE_NAME = 'youtube:watchlater'
3534 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
92519402 3535 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
25f14e9f 3536
bc7a9cd8
S
3537 _TESTS = [{
3538 'url': 'https://www.youtube.com/playlist?list=WL',
3539 'only_matching': True,
3540 }, {
3541 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
3542 'only_matching': True,
3543 }]
25f14e9f
S
3544
3545 def _real_extract(self, url):
7e5dc339 3546 _, video = self._check_download_just_video(url, 'WL')
ebf1b291
S
3547 if video:
3548 return video
dacb3a86
S
3549 _, playlist = self._extract_playlist('WL')
3550 return playlist
f459d170 3551
5f6a1245 3552
c626a3d9 3553class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
78caa52a 3554 IE_NAME = 'youtube:favorites'
f3a34072 3555 IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
92519402 3556 _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
c626a3d9
JMF
3557 _LOGIN_REQUIRED = True
3558
3559 def _real_extract(self, url):
3560 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
78caa52a 3561 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
c626a3d9 3562 return self.url_result(playlist_id, 'YoutubePlaylist')
15870e90
PH
3563
3564
25f14e9f
S
3565class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3566 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
92519402 3567 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
25f14e9f
S
3568 _FEED_NAME = 'recommended'
3569 _PLAYLIST_TITLE = 'Youtube Recommended videos'
1ed5b5c9 3570
1ed5b5c9 3571
25f14e9f
S
3572class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
3573 IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
92519402 3574 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
25f14e9f
S
3575 _FEED_NAME = 'subscriptions'
3576 _PLAYLIST_TITLE = 'Youtube Subscriptions'
1ed5b5c9 3577
1ed5b5c9 3578
25f14e9f
S
3579class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
3580 IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
92519402 3581 _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
25f14e9f
S
3582 _FEED_NAME = 'history'
3583 _PLAYLIST_TITLE = 'Youtube History'
1ed5b5c9
JMF
3584
3585
15870e90
PH
3586class YoutubeTruncatedURLIE(InfoExtractor):
3587 IE_NAME = 'youtube:truncated_url'
3588 IE_DESC = False # Do not list
975d35db 3589 _VALID_URL = r'''(?x)
b95aab84
PH
3590 (?:https?://)?
3591 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3592 (?:watch\?(?:
c4808c60 3593 feature=[a-z_]+|
b95aab84
PH
3594 annotation_id=annotation_[^&]+|
3595 x-yt-cl=[0-9]+|
c1708b89 3596 hl=[^&]*|
287be8c6 3597 t=[0-9]+
b95aab84
PH
3598 )?
3599 |
3600 attribution_link\?a=[^&]+
3601 )
3602 $
975d35db 3603 '''
15870e90 3604
c4808c60 3605 _TESTS = [{
2d3d2997 3606 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3607 'only_matching': True,
dc2fc736 3608 }, {
2d3d2997 3609 'url': 'https://www.youtube.com/watch?',
dc2fc736 3610 'only_matching': True,
b95aab84
PH
3611 }, {
3612 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3613 'only_matching': True,
3614 }, {
3615 'url': 'https://www.youtube.com/watch?feature=foo',
3616 'only_matching': True,
c1708b89
PH
3617 }, {
3618 'url': 'https://www.youtube.com/watch?hl=en-GB',
3619 'only_matching': True,
287be8c6
PH
3620 }, {
3621 'url': 'https://www.youtube.com/watch?t=2372',
3622 'only_matching': True,
c4808c60
PH
3623 }]
3624
15870e90
PH
3625 def _real_extract(self, url):
3626 raise ExtractorError(
78caa52a
PH
3627 'Did you forget to quote the URL? Remember that & is a meta '
3628 'character in most shells, so you want to put the URL in quotes, '
3867038a 3629 'like youtube-dl '
2d3d2997 3630 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3631 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3632 expected=True)
772fd5cc
PH
3633
3634
3635class YoutubeTruncatedIDIE(InfoExtractor):
3636 IE_NAME = 'youtube:truncated_id'
3637 IE_DESC = False # Do not list
b95aab84 3638 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3639
3640 _TESTS = [{
3641 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3642 'only_matching': True,
3643 }]
3644
3645 def _real_extract(self, url):
3646 video_id = self._match_id(url)
3647 raise ExtractorError(
3648 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3649 expected=True)