]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
Ensure `mergeall` selects best format when multistreams are disabled
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
c5e8d7af 32 ExtractorError,
b60419c5 33 format_field,
2d30521a 34 float_or_none,
dd27fd17 35 int_or_none,
94278f72 36 mimetype2ext,
6310acf5 37 parse_codecs,
7c80519c 38 parse_duration,
dca3ff4a 39 qualities,
3995d37d 40 remove_start,
cf7e015f 41 smuggle_url,
dbdaaa23 42 str_or_none,
c93d53f5 43 str_to_int,
556dbe7f 44 try_get,
c5e8d7af
PH
45 unescapeHTML,
46 unified_strdate,
cf7e015f 47 unsmuggle_url,
8bdd16b4 48 update_url_query,
21c340b8 49 url_or_none,
6e6bc8da 50 urlencode_postdata,
d92f5d5a 51 urljoin
c5e8d7af
PH
52)
53
5f6a1245 54
de7f3446 55class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
56 """Provide base functions for Youtube extractors"""
57 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 58 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
59
60 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
61 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
62 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 63
3462ffa8 64 _RESERVED_NAMES = (
cd7c66cf 65 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
66 r'movies|results|shared|hashtag|trending|feed|feeds|'
67 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 68
b2e8bc1b
JMF
69 _NETRC_MACHINE = 'youtube'
70 # If True it will raise an error if no login info is provided
71 _LOGIN_REQUIRED = False
72
70d5c17b 73 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 74
25f14e9f
S
75 def _ids_to_results(self, ids):
76 return [
77 self.url_result(vid_id, 'Youtube', video_id=vid_id)
78 for vid_id in ids]
79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
68217024 88 username, password = self._get_login_info()
b2e8bc1b
JMF
89 # No authentication to be performed
90 if username is None:
70d35d16 91 if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
69ea8ca4 92 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
545cc85d 93 # if self._downloader.params.get('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
94 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 95 return True
b2e8bc1b 96
7cc3570e
PH
97 login_page = self._download_webpage(
98 self._LOGIN_URL, None,
69ea8ca4
PH
99 note='Downloading login page',
100 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
101 if login_page is False:
102 return
b2e8bc1b 103
1212e997 104 login_form = self._hidden_inputs(login_page)
c5e8d7af 105
e00eb564
S
106 def req(url, f_req, note, errnote):
107 data = login_form.copy()
108 data.update({
109 'pstMsg': 1,
110 'checkConnection': 'youtube',
111 'checkedDomains': 'youtube',
112 'hl': 'en',
113 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 114 'f.req': json.dumps(f_req),
e00eb564
S
115 'flowName': 'GlifWebSignIn',
116 'flowEntry': 'ServiceLogin',
baf67a60
S
117 # TODO: reverse actual botguard identifier generation algo
118 'bgRequest': '["identifier",""]',
041bc3ad 119 })
e00eb564
S
120 return self._download_json(
121 url, None, note=note, errnote=errnote,
122 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
123 fatal=False,
124 data=urlencode_postdata(data), headers={
125 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
126 'Google-Accounts-XSRF': 1,
127 })
128
3995d37d
S
129 def warn(message):
130 self._downloader.report_warning(message)
131
132 lookup_req = [
133 username,
134 None, [], None, 'US', None, None, 2, False, True,
135 [
136 None, None,
137 [2, 1, None, 1,
138 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
139 None, [], 4],
140 1, [None, None, []], None, None, None, True
141 ],
142 username,
143 ]
144
e00eb564 145 lookup_results = req(
3995d37d 146 self._LOOKUP_URL, lookup_req,
e00eb564
S
147 'Looking up account info', 'Unable to look up account info')
148
149 if lookup_results is False:
150 return False
041bc3ad 151
3995d37d
S
152 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
153 if not user_hash:
154 warn('Unable to extract user hash')
155 return False
156
157 challenge_req = [
158 user_hash,
159 None, 1, None, [1, None, None, None, [password, None, True]],
160 [
161 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
162 1, [None, None, []], None, None, None, True
163 ]]
83317f69 164
3995d37d
S
165 challenge_results = req(
166 self._CHALLENGE_URL, challenge_req,
167 'Logging in', 'Unable to log in')
83317f69 168
3995d37d 169 if challenge_results is False:
e00eb564 170 return
83317f69 171
3995d37d
S
172 login_res = try_get(challenge_results, lambda x: x[0][5], list)
173 if login_res:
174 login_msg = try_get(login_res, lambda x: x[5], compat_str)
175 warn(
176 'Unable to login: %s' % 'Invalid password'
177 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
178 return False
179
180 res = try_get(challenge_results, lambda x: x[0][-1], list)
181 if not res:
182 warn('Unable to extract result entry')
183 return False
184
9a6628aa
S
185 login_challenge = try_get(res, lambda x: x[0][0], list)
186 if login_challenge:
187 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
188 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
189 # SEND_SUCCESS - TFA code has been successfully sent to phone
190 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 191 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
192 if status == 'QUOTA_EXCEEDED':
193 warn('Exceeded the limit of TFA codes, try later')
194 return False
195
196 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
197 if not tl:
198 warn('Unable to extract TL')
199 return False
200
201 tfa_code = self._get_tfa_info('2-step verification code')
202
203 if not tfa_code:
204 warn(
205 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
206 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
207 return False
208
209 tfa_code = remove_start(tfa_code, 'G-')
210
211 tfa_req = [
212 user_hash, None, 2, None,
213 [
214 9, None, None, None, None, None, None, None,
215 [None, tfa_code, True, 2]
216 ]]
217
218 tfa_results = req(
219 self._TFA_URL.format(tl), tfa_req,
220 'Submitting TFA code', 'Unable to submit TFA code')
221
222 if tfa_results is False:
223 return False
224
225 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
226 if tfa_res:
227 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
228 warn(
229 'Unable to finish TFA: %s' % 'Invalid TFA code'
230 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
231 return False
232
233 check_cookie_url = try_get(
234 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
235 else:
236 CHALLENGES = {
237 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
238 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
239 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
240 }
241 challenge = CHALLENGES.get(
242 challenge_str,
243 '%s returned error %s.' % (self.IE_NAME, challenge_str))
244 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
245 return False
3995d37d
S
246 else:
247 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
248
249 if not check_cookie_url:
250 warn('Unable to extract CheckCookie URL')
251 return False
e00eb564
S
252
253 check_cookie_results = self._download_webpage(
3995d37d
S
254 check_cookie_url, None, 'Checking cookie', fatal=False)
255
256 if check_cookie_results is False:
257 return False
e00eb564 258
3995d37d
S
259 if 'https://myaccount.google.com/' not in check_cookie_results:
260 warn('Unable to log in')
b2e8bc1b 261 return False
e00eb564 262
b2e8bc1b
JMF
263 return True
264
cce889b9 265 def _initialize_consent(self):
266 cookies = self._get_cookies('https://www.youtube.com/')
267 if cookies.get('__Secure-3PSID'):
268 return
269 consent_id = None
270 consent = cookies.get('CONSENT')
271 if consent:
272 if 'YES' in consent.value:
273 return
274 consent_id = self._search_regex(
275 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
276 if not consent_id:
277 consent_id = random.randint(100, 999)
278 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 279
b2e8bc1b 280 def _real_initialize(self):
cce889b9 281 self._initialize_consent()
b2e8bc1b
JMF
282 if self._downloader is None:
283 return
b2e8bc1b
JMF
284 if not self._login():
285 return
c5e8d7af 286
f4f751af 287 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
288 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 289 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 290 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
291 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 292
a5c56234
M
293 def _generate_sapisidhash_header(self):
294 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
295 if sapisid_cookie is None:
296 return
297 time_now = round(time.time())
298 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
299 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
300
301 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 302 note='Downloading API JSON', errnote='Unable to download API page',
303 context=None, api_key=None):
304
305 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 306 data.update(query)
f4f751af 307 real_headers = self._generate_api_headers()
308 real_headers.update({'content-type': 'application/json'})
309 if headers:
310 real_headers.update(headers)
545cc85d 311 return self._download_json(
a5c56234
M
312 'https://www.youtube.com/youtubei/v1/%s' % ep,
313 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 314 data=json.dumps(data).encode('utf8'), headers=real_headers,
315 query={'key': api_key or self._extract_api_key()})
316
317 def _extract_api_key(self, ytcfg=None):
318 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 319
8bdd16b4 320 def _extract_yt_initial_data(self, video_id, webpage):
321 return self._parse_json(
322 self._search_regex(
29f7c58a 323 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 324 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 325 video_id)
0c148415 326
a1c5d2ca
M
327 def _extract_identity_token(self, webpage, item_id):
328 ytcfg = self._extract_ytcfg(item_id, webpage)
329 if ytcfg:
330 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
331 if token:
332 return token
333 return self._search_regex(
334 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
335 'identity token', default=None)
336
337 @staticmethod
338 def _extract_account_syncid(data):
339 """Extract syncId required to download private playlists of secondary channels"""
340 sync_ids = (
341 try_get(data, lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'], compat_str)
342 or '').split("||")
343 if len(sync_ids) >= 2 and sync_ids[1]:
344 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
345 # and just "user_syncid||" for primary channel. We only want the channel_syncid
346 return sync_ids[0]
347
29f7c58a 348 def _extract_ytcfg(self, video_id, webpage):
349 return self._parse_json(
350 self._search_regex(
351 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 352 default='{}'), video_id, fatal=False) or {}
353
354 def __extract_client_version(self, ytcfg):
355 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
356
357 def _extract_context(self, ytcfg=None):
358 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
359 if context:
360 return context
361
362 # Recreate the client context (required)
363 client_version = self.__extract_client_version(ytcfg)
364 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
365 context = {
366 'client': {
367 'clientName': client_name,
368 'clientVersion': client_version,
369 }
370 }
371 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
372 if visitor_data:
373 context['client']['visitorData'] = visitor_data
374 return context
375
376 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
377 headers = {
378 'X-YouTube-Client-Name': '1',
379 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
380 }
381 if identity_token:
382 headers['x-youtube-identity-token'] = identity_token
383 if account_syncid:
384 headers['X-Goog-PageId'] = account_syncid
385 headers['X-Goog-AuthUser'] = 0
386 if visitor_data:
387 headers['x-goog-visitor-id'] = visitor_data
388 auth = self._generate_sapisidhash_header()
389 if auth is not None:
390 headers['Authorization'] = auth
391 headers['X-Origin'] = 'https://www.youtube.com'
392 return headers
29f7c58a 393
30a074c2 394 def _extract_video(self, renderer):
395 video_id = renderer.get('videoId')
396 title = try_get(
397 renderer,
398 (lambda x: x['title']['runs'][0]['text'],
399 lambda x: x['title']['simpleText']), compat_str)
400 description = try_get(
401 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
402 compat_str)
403 duration = parse_duration(try_get(
404 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
405 view_count_text = try_get(
406 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
407 view_count = str_to_int(self._search_regex(
408 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
409 'view count', default=None))
410 uploader = try_get(
bc2ca1bb 411 renderer,
412 (lambda x: x['ownerText']['runs'][0]['text'],
413 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 414 return {
39ed931e 415 '_type': 'url',
30a074c2 416 'ie_key': YoutubeIE.ie_key(),
417 'id': video_id,
418 'url': video_id,
419 'title': title,
420 'description': description,
421 'duration': duration,
422 'view_count': view_count,
423 'uploader': uploader,
424 }
425
0c148415 426
360e1ca5 427class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 428 IE_DESC = 'YouTube.com'
bc2ca1bb 429 _INVIDIOUS_SITES = (
430 # invidious-redirect websites
431 r'(?:www\.)?redirect\.invidious\.io',
432 r'(?:(?:www|dev)\.)?invidio\.us',
433 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
434 r'(?:www\.)?invidious\.pussthecat\.org',
435 r'(?:www\.)?invidious\.048596\.xyz',
436 r'(?:www\.)?invidious\.zee\.li',
437 r'(?:www\.)?vid\.puffyan\.us',
438 r'(?:(?:www|au)\.)?ytprivate\.com',
439 r'(?:www\.)?invidious\.namazso\.eu',
440 r'(?:www\.)?invidious\.ethibox\.fr',
441 r'(?:www\.)?inv\.skyn3t\.in',
442 r'(?:www\.)?invidious\.himiko\.cloud',
443 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
444 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
445 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
446 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
447 # youtube-dl invidious instances list
448 r'(?:(?:www|no)\.)?invidiou\.sh',
449 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
450 r'(?:www\.)?invidious\.kabi\.tk',
451 r'(?:www\.)?invidious\.13ad\.de',
452 r'(?:www\.)?invidious\.mastodon\.host',
453 r'(?:www\.)?invidious\.zapashcanon\.fr',
454 r'(?:www\.)?invidious\.kavin\.rocks',
455 r'(?:www\.)?invidious\.tube',
456 r'(?:www\.)?invidiou\.site',
457 r'(?:www\.)?invidious\.site',
458 r'(?:www\.)?invidious\.xyz',
459 r'(?:www\.)?invidious\.nixnet\.xyz',
460 r'(?:www\.)?invidious\.drycat\.fr',
461 r'(?:www\.)?tube\.poal\.co',
462 r'(?:www\.)?tube\.connect\.cafe',
463 r'(?:www\.)?vid\.wxzm\.sx',
464 r'(?:www\.)?vid\.mint\.lgbt',
465 r'(?:www\.)?yewtu\.be',
466 r'(?:www\.)?yt\.elukerio\.org',
467 r'(?:www\.)?yt\.lelux\.fi',
468 r'(?:www\.)?invidious\.ggc-project\.de',
469 r'(?:www\.)?yt\.maisputain\.ovh',
470 r'(?:www\.)?invidious\.toot\.koeln',
471 r'(?:www\.)?invidious\.fdn\.fr',
472 r'(?:www\.)?watch\.nettohikari\.com',
473 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
474 r'(?:www\.)?qklhadlycap4cnod\.onion',
475 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
476 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
477 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
478 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
479 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
480 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
481 )
cb7dfeea 482 _VALID_URL = r"""(?x)^
c5e8d7af 483 (
edb53e2d 484 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 485 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
486 (?:www\.)?deturl\.com/www\.youtube\.com|
487 (?:www\.)?pwnyoutube\.com|
488 (?:www\.)?hooktube\.com|
489 (?:www\.)?yourepeat\.com|
490 tube\.majestyc\.net|
491 %(invidious)s|
492 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
493 (?:.*?\#/)? # handle anchor (#/) redirect urls
494 (?: # the various things that can precede the ID:
ac7553d0 495 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 496 |(?: # or the v= param in all its forms
f7000f3a 497 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 498 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 499 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
500 v=
501 )
f4b05232 502 ))
cbaed4bb
S
503 |(?:
504 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
505 vid\.plus| # or vid.plus/xxxx
506 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 507 %(invidious)s
cbaed4bb 508 )/
edb53e2d 509 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 510 )
c5e8d7af 511 )? # all until now is optional -> you can pass the naked ID
8bdd16b4 512 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
d0ba5587
S
513 (?!.*?\blist=
514 (?:
515 %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
516 WL # WL are handled by the watch later IE
517 )
518 )
c5e8d7af 519 (?(1).+)? # if we found the ID, everything can follow
bc2ca1bb 520 $""" % {
521 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
522 'invidious': '|'.join(_INVIDIOUS_SITES),
523 }
e40c758c 524 _PLAYER_INFO_RE = (
cc2db878 525 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
526 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 527 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 528 )
2c62dc26 529 _formats = {
c2d3cb4c 530 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
531 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
532 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
533 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
534 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
535 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
536 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
537 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 538 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 539 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
540 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
541 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
542 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
543 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
544 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 545 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 546 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
547 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 548
549
550 # 3D videos
c2d3cb4c 551 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
552 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
553 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
554 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 555 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
556 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
557 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 558
96fb5605 559 # Apple HTTP Live Streaming
11f12195 560 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 561 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
562 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
563 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
564 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
565 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 566 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
567 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
568
569 # DASH mp4 video
d23028a8
S
570 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
571 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
572 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
573 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
574 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 575 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
576 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
577 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
578 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
579 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
580 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
581 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 582
f6f1fc92 583 # Dash mp4 audio
d23028a8
S
584 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
585 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
586 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
587 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
588 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
589 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
590 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
591
592 # Dash webm
d23028a8
S
593 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
594 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
595 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
596 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
597 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
598 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
599 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
600 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
601 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
602 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
603 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
604 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
605 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
606 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
607 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 608 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
609 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
610 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
611 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
612 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
613 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
614 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
615
616 # Dash webm audio
d23028a8
S
617 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
618 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 619
0857baad 620 # Dash webm audio with opus inside
d23028a8
S
621 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
622 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
623 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 624
ce6b9a2d
PH
625 # RTMP (unnamed)
626 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
627
628 # av01 video only formats sometimes served with "unknown" codecs
629 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
630 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
631 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
632 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 633 }
29f7c58a 634 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 635
fd5c4aab
S
636 _GEO_BYPASS = False
637
78caa52a 638 IE_NAME = 'youtube'
2eb88d95
PH
639 _TESTS = [
640 {
2d3d2997 641 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
642 'info_dict': {
643 'id': 'BaW_jenozKc',
644 'ext': 'mp4',
3867038a 645 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
646 'uploader': 'Philipp Hagemeister',
647 'uploader_id': 'phihag',
ec85ded8 648 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
649 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
650 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 651 'upload_date': '20121002',
3867038a 652 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 653 'categories': ['Science & Technology'],
3867038a 654 'tags': ['youtube-dl'],
556dbe7f 655 'duration': 10,
dbdaaa23 656 'view_count': int,
3e7c1224
PH
657 'like_count': int,
658 'dislike_count': int,
7c80519c 659 'start_time': 1,
297a564b 660 'end_time': 9,
2eb88d95 661 }
0e853ca4 662 },
fccd3771 663 {
4bc3a23e
PH
664 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
665 'note': 'Embed-only video (#1746)',
666 'info_dict': {
667 'id': 'yZIXLfi8CZQ',
668 'ext': 'mp4',
669 'upload_date': '20120608',
670 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
671 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
672 'uploader': 'SET India',
94bfcd23 673 'uploader_id': 'setindia',
ec85ded8 674 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 675 'age_limit': 18,
545cc85d 676 },
677 'skip': 'Private video',
fccd3771 678 },
11b56058 679 {
8bdd16b4 680 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
681 'note': 'Use the first video ID in the URL',
682 'info_dict': {
683 'id': 'BaW_jenozKc',
684 'ext': 'mp4',
3867038a 685 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
686 'uploader': 'Philipp Hagemeister',
687 'uploader_id': 'phihag',
ec85ded8 688 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 689 'upload_date': '20121002',
3867038a 690 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 691 'categories': ['Science & Technology'],
3867038a 692 'tags': ['youtube-dl'],
556dbe7f 693 'duration': 10,
dbdaaa23 694 'view_count': int,
11b56058
PM
695 'like_count': int,
696 'dislike_count': int,
34a7de29
S
697 },
698 'params': {
699 'skip_download': True,
700 },
11b56058 701 },
dd27fd17 702 {
2d3d2997 703 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
704 'note': '256k DASH audio (format 141) via DASH manifest',
705 'info_dict': {
706 'id': 'a9LDPn-MO4I',
707 'ext': 'm4a',
708 'upload_date': '20121002',
709 'uploader_id': '8KVIDEO',
ec85ded8 710 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
711 'description': '',
712 'uploader': '8KVIDEO',
713 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 714 },
4bc3a23e
PH
715 'params': {
716 'youtube_include_dash_manifest': True,
717 'format': '141',
4919603f 718 },
de3c7fe0 719 'skip': 'format 141 not served anymore',
dd27fd17 720 },
8bdd16b4 721 # DASH manifest with encrypted signature
722 {
723 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
724 'info_dict': {
725 'id': 'IB3lcPjvWLA',
726 'ext': 'm4a',
727 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
728 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
729 'duration': 244,
730 'uploader': 'AfrojackVEVO',
731 'uploader_id': 'AfrojackVEVO',
732 'upload_date': '20131011',
cc2db878 733 'abr': 129.495,
8bdd16b4 734 },
735 'params': {
736 'youtube_include_dash_manifest': True,
737 'format': '141/bestaudio[ext=m4a]',
738 },
739 },
aa79ac0c
PH
740 # Controversy video
741 {
742 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
743 'info_dict': {
744 'id': 'T4XJQO3qol8',
745 'ext': 'mp4',
556dbe7f 746 'duration': 219,
aa79ac0c 747 'upload_date': '20100909',
4fe54c12 748 'uploader': 'Amazing Atheist',
aa79ac0c 749 'uploader_id': 'TheAmazingAtheist',
ec85ded8 750 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 751 'title': 'Burning Everyone\'s Koran',
545cc85d 752 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 753 }
c522adb1 754 },
dd2d55f1 755 # Normal age-gate video (embed allowed)
c522adb1 756 {
2d3d2997 757 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
758 'info_dict': {
759 'id': 'HtVdAasjOgU',
760 'ext': 'mp4',
761 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 762 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 763 'duration': 142,
c522adb1
JMF
764 'uploader': 'The Witcher',
765 'uploader_id': 'WitcherGame',
ec85ded8 766 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 767 'upload_date': '20140605',
34952f09 768 'age_limit': 18,
c522adb1
JMF
769 },
770 },
8bdd16b4 771 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
772 # YouTube Red ad is not captured for creator
773 {
774 'url': '__2ABJjxzNo',
775 'info_dict': {
776 'id': '__2ABJjxzNo',
777 'ext': 'mp4',
778 'duration': 266,
779 'upload_date': '20100430',
780 'uploader_id': 'deadmau5',
781 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 782 'creator': 'deadmau5',
783 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 784 'uploader': 'deadmau5',
785 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 786 'alt_title': 'Some Chords',
8bdd16b4 787 },
788 'expected_warnings': [
789 'DASH manifest missing',
790 ]
791 },
067aa17e 792 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
793 {
794 'url': 'lqQg6PlCWgI',
795 'info_dict': {
796 'id': 'lqQg6PlCWgI',
797 'ext': 'mp4',
556dbe7f 798 'duration': 6085,
90227264 799 'upload_date': '20150827',
cbe2bd91 800 'uploader_id': 'olympic',
ec85ded8 801 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 802 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 803 'uploader': 'Olympic',
cbe2bd91
PH
804 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
805 },
806 'params': {
807 'skip_download': 'requires avconv',
e52a40ab 808 }
cbe2bd91 809 },
6271f1ca
PH
810 # Non-square pixels
811 {
812 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
813 'info_dict': {
814 'id': '_b-2C3KPAM0',
815 'ext': 'mp4',
816 'stretched_ratio': 16 / 9.,
556dbe7f 817 'duration': 85,
6271f1ca
PH
818 'upload_date': '20110310',
819 'uploader_id': 'AllenMeow',
ec85ded8 820 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 821 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 822 'uploader': '孫ᄋᄅ',
6271f1ca
PH
823 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
824 },
06b491eb
S
825 },
826 # url_encoded_fmt_stream_map is empty string
827 {
828 'url': 'qEJwOuvDf7I',
829 'info_dict': {
830 'id': 'qEJwOuvDf7I',
f57b7835 831 'ext': 'webm',
06b491eb
S
832 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
833 'description': '',
834 'upload_date': '20150404',
835 'uploader_id': 'spbelect',
836 'uploader': 'Наблюдатели Петербурга',
837 },
838 'params': {
839 'skip_download': 'requires avconv',
e323cf3f
S
840 },
841 'skip': 'This live event has ended.',
06b491eb 842 },
067aa17e 843 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
844 {
845 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
846 'info_dict': {
847 'id': 'FIl7x6_3R5Y',
eb6793ba 848 'ext': 'webm',
da77d856
S
849 'title': 'md5:7b81415841e02ecd4313668cde88737a',
850 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 851 'duration': 220,
da77d856
S
852 'upload_date': '20150625',
853 'uploader_id': 'dorappi2000',
ec85ded8 854 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 855 'uploader': 'dorappi2000',
eb6793ba 856 'formats': 'mincount:31',
da77d856 857 },
eb6793ba 858 'skip': 'not actual anymore',
2ee8f5d8 859 },
8a1a26ce
YCH
860 # DASH manifest with segment_list
861 {
862 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
863 'md5': '8ce563a1d667b599d21064e982ab9e31',
864 'info_dict': {
865 'id': 'CsmdDsKjzN8',
866 'ext': 'mp4',
17ee98e1 867 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
868 'uploader': 'Airtek',
869 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
870 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
871 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
872 },
873 'params': {
874 'youtube_include_dash_manifest': True,
875 'format': '135', # bestvideo
be49068d
S
876 },
877 'skip': 'This live event has ended.',
2ee8f5d8 878 },
cf7e015f
S
879 {
880 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 881 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 882 'info_dict': {
545cc85d 883 'id': 'jvGDaLqkpTg',
884 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
885 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
886 },
887 'playlist': [{
888 'info_dict': {
545cc85d 889 'id': 'jvGDaLqkpTg',
cf7e015f 890 'ext': 'mp4',
545cc85d 891 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
892 'description': 'md5:e03b909557865076822aa169218d6a5d',
893 'duration': 10643,
894 'upload_date': '20161111',
895 'uploader': 'Team PGP',
896 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
897 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
898 },
899 }, {
900 'info_dict': {
545cc85d 901 'id': '3AKt1R1aDnw',
cf7e015f 902 'ext': 'mp4',
545cc85d 903 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
904 'description': 'md5:e03b909557865076822aa169218d6a5d',
905 'duration': 10991,
906 'upload_date': '20161111',
907 'uploader': 'Team PGP',
908 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
909 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
910 },
911 }, {
912 'info_dict': {
545cc85d 913 'id': 'RtAMM00gpVc',
cf7e015f 914 'ext': 'mp4',
545cc85d 915 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
916 'description': 'md5:e03b909557865076822aa169218d6a5d',
917 'duration': 10995,
918 'upload_date': '20161111',
919 'uploader': 'Team PGP',
920 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
921 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
922 },
923 }, {
924 'info_dict': {
545cc85d 925 'id': '6N2fdlP3C5U',
cf7e015f 926 'ext': 'mp4',
545cc85d 927 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
928 'description': 'md5:e03b909557865076822aa169218d6a5d',
929 'duration': 10990,
930 'upload_date': '20161111',
931 'uploader': 'Team PGP',
932 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
933 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
934 },
935 }],
936 'params': {
937 'skip_download': True,
938 },
cbaed4bb 939 },
f9f49d87 940 {
067aa17e 941 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
942 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
943 'info_dict': {
944 'id': 'gVfLd0zydlo',
945 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
946 },
947 'playlist_count': 2,
be49068d 948 'skip': 'Not multifeed anymore',
f9f49d87 949 },
cbaed4bb 950 {
2d3d2997 951 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 952 'only_matching': True,
0e49d9a6 953 },
6d4fc66b 954 {
2d3d2997 955 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
956 'only_matching': True,
957 },
0e49d9a6 958 {
067aa17e 959 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 960 # Also tests cut-off URL expansion in video description (see
067aa17e
S
961 # https://github.com/ytdl-org/youtube-dl/issues/1892,
962 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
963 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
964 'info_dict': {
965 'id': 'lsguqyKfVQg',
966 'ext': 'mp4',
967 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 968 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 969 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 970 'duration': 133,
0e49d9a6
LL
971 'upload_date': '20151119',
972 'uploader_id': 'IronSoulElf',
ec85ded8 973 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 974 'uploader': 'IronSoulElf',
eb6793ba
S
975 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
976 'track': 'Dark Walk - Position Music',
977 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 978 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
979 },
980 'params': {
981 'skip_download': True,
982 },
983 },
61f92af1 984 {
067aa17e 985 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
986 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
987 'only_matching': True,
988 },
313dfc45
LL
989 {
990 # Video with yt:stretch=17:0
991 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
992 'info_dict': {
993 'id': 'Q39EVAstoRM',
994 'ext': 'mp4',
995 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
996 'description': 'md5:ee18a25c350637c8faff806845bddee9',
997 'upload_date': '20151107',
998 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
999 'uploader': 'CH GAMER DROID',
1000 },
1001 'params': {
1002 'skip_download': True,
1003 },
be49068d 1004 'skip': 'This video does not exist.',
313dfc45 1005 },
7caf9830
S
1006 {
1007 # Video licensed under Creative Commons
1008 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1009 'info_dict': {
1010 'id': 'M4gD1WSo5mA',
1011 'ext': 'mp4',
1012 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1013 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1014 'duration': 721,
7caf9830
S
1015 'upload_date': '20150127',
1016 'uploader_id': 'BerkmanCenter',
ec85ded8 1017 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1018 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1019 'license': 'Creative Commons Attribution license (reuse allowed)',
1020 },
1021 'params': {
1022 'skip_download': True,
1023 },
1024 },
fd050249
S
1025 {
1026 # Channel-like uploader_url
1027 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1028 'info_dict': {
1029 'id': 'eQcmzGIKrzg',
1030 'ext': 'mp4',
1031 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1032 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1033 'duration': 4060,
fd050249 1034 'upload_date': '20151119',
eb6793ba 1035 'uploader': 'Bernie Sanders',
fd050249 1036 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1037 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1038 'license': 'Creative Commons Attribution license (reuse allowed)',
1039 },
1040 'params': {
1041 'skip_download': True,
1042 },
1043 },
040ac686
S
1044 {
1045 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1046 'only_matching': True,
7f29cf54
S
1047 },
1048 {
067aa17e 1049 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1050 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1051 'only_matching': True,
6496ccb4
S
1052 },
1053 {
1054 # Rental video preview
1055 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1056 'info_dict': {
1057 'id': 'uGpuVWrhIzE',
1058 'ext': 'mp4',
1059 'title': 'Piku - Trailer',
1060 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1061 'upload_date': '20150811',
1062 'uploader': 'FlixMatrix',
1063 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1064 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1065 'license': 'Standard YouTube License',
1066 },
1067 'params': {
1068 'skip_download': True,
1069 },
eb6793ba 1070 'skip': 'This video is not available.',
022a5d66 1071 },
12afdc2a
S
1072 {
1073 # YouTube Red video with episode data
1074 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1075 'info_dict': {
1076 'id': 'iqKdEhx-dD4',
1077 'ext': 'mp4',
1078 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1079 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1080 'duration': 2085,
12afdc2a
S
1081 'upload_date': '20170118',
1082 'uploader': 'Vsauce',
1083 'uploader_id': 'Vsauce',
1084 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1085 'series': 'Mind Field',
1086 'season_number': 1,
1087 'episode_number': 1,
1088 },
1089 'params': {
1090 'skip_download': True,
1091 },
1092 'expected_warnings': [
1093 'Skipping DASH manifest',
1094 ],
1095 },
c7121fa7
S
1096 {
1097 # The following content has been identified by the YouTube community
1098 # as inappropriate or offensive to some audiences.
1099 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1100 'info_dict': {
1101 'id': '6SJNVb0GnPI',
1102 'ext': 'mp4',
1103 'title': 'Race Differences in Intelligence',
1104 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1105 'duration': 965,
1106 'upload_date': '20140124',
1107 'uploader': 'New Century Foundation',
1108 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1109 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1110 },
1111 'params': {
1112 'skip_download': True,
1113 },
545cc85d 1114 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1115 },
022a5d66
S
1116 {
1117 # itag 212
1118 'url': '1t24XAntNCY',
1119 'only_matching': True,
fd5c4aab
S
1120 },
1121 {
1122 # geo restricted to JP
1123 'url': 'sJL6WA-aGkQ',
1124 'only_matching': True,
1125 },
cd5a74a2
S
1126 {
1127 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1128 'only_matching': True,
1129 },
bc2ca1bb 1130 {
1131 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1132 'only_matching': True,
1133 },
1134 {
1135 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1136 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1137 'only_matching': True,
1138 },
825cd268
RA
1139 {
1140 # DRM protected
1141 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1142 'only_matching': True,
4fe54c12
S
1143 },
1144 {
1145 # Video with unsupported adaptive stream type formats
1146 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1147 'info_dict': {
1148 'id': 'Z4Vy8R84T1U',
1149 'ext': 'mp4',
1150 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1151 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1152 'duration': 433,
1153 'upload_date': '20130923',
1154 'uploader': 'Amelia Putri Harwita',
1155 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1156 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1157 'formats': 'maxcount:10',
1158 },
1159 'params': {
1160 'skip_download': True,
1161 'youtube_include_dash_manifest': False,
1162 },
5429d6a9 1163 'skip': 'not actual anymore',
5caabd3c 1164 },
1165 {
822b9d9c 1166 # Youtube Music Auto-generated description
5caabd3c 1167 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1168 'info_dict': {
1169 'id': 'MgNrAu2pzNs',
1170 'ext': 'mp4',
1171 'title': 'Voyeur Girl',
1172 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1173 'upload_date': '20190312',
5429d6a9
S
1174 'uploader': 'Stephen - Topic',
1175 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1176 'artist': 'Stephen',
1177 'track': 'Voyeur Girl',
1178 'album': 'it\'s too much love to know my dear',
1179 'release_date': '20190313',
1180 'release_year': 2019,
1181 },
1182 'params': {
1183 'skip_download': True,
1184 },
1185 },
66b48727
RA
1186 {
1187 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1188 'only_matching': True,
1189 },
011e75e6
S
1190 {
1191 # invalid -> valid video id redirection
1192 'url': 'DJztXj2GPfl',
1193 'info_dict': {
1194 'id': 'DJztXj2GPfk',
1195 'ext': 'mp4',
1196 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1197 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1198 'upload_date': '20090125',
1199 'uploader': 'Prochorowka',
1200 'uploader_id': 'Prochorowka',
1201 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1202 'artist': 'Panjabi MC',
1203 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1204 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1205 },
1206 'params': {
1207 'skip_download': True,
1208 },
545cc85d 1209 'skip': 'Video unavailable',
ea74e00b
DP
1210 },
1211 {
1212 # empty description results in an empty string
1213 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1214 'info_dict': {
1215 'id': 'x41yOUIvK2k',
1216 'ext': 'mp4',
1217 'title': 'IMG 3456',
1218 'description': '',
1219 'upload_date': '20170613',
1220 'uploader_id': 'ElevageOrVert',
1221 'uploader': 'ElevageOrVert',
1222 },
1223 'params': {
1224 'skip_download': True,
1225 },
1226 },
a0566bbf 1227 {
29f7c58a 1228 # with '};' inside yt initial data (see [1])
1229 # see [2] for an example with '};' inside ytInitialPlayerResponse
1230 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1231 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1232 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1233 'info_dict': {
1234 'id': 'CHqg6qOn4no',
1235 'ext': 'mp4',
1236 'title': 'Part 77 Sort a list of simple types in c#',
1237 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1238 'upload_date': '20130831',
1239 'uploader_id': 'kudvenkat',
1240 'uploader': 'kudvenkat',
1241 },
1242 'params': {
1243 'skip_download': True,
1244 },
1245 },
29f7c58a 1246 {
1247 # another example of '};' in ytInitialData
1248 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1249 'only_matching': True,
1250 },
1251 {
1252 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1253 'only_matching': True,
1254 },
545cc85d 1255 {
cc2db878 1256 # https://github.com/ytdl-org/youtube-dl/pull/28094
1257 'url': 'OtqTfy26tG0',
1258 'info_dict': {
1259 'id': 'OtqTfy26tG0',
1260 'ext': 'mp4',
1261 'title': 'Burn Out',
1262 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1263 'upload_date': '20141120',
1264 'uploader': 'The Cinematic Orchestra - Topic',
1265 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1266 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1267 'artist': 'The Cinematic Orchestra',
1268 'track': 'Burn Out',
1269 'album': 'Every Day',
1270 'release_data': None,
1271 'release_year': None,
1272 },
1273 'params': {
1274 'skip_download': True,
1275 },
545cc85d 1276 },
bc2ca1bb 1277 {
1278 # controversial video, only works with bpctr when authenticated with cookies
1279 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1280 'only_matching': True,
1281 },
f7ad7160 1282 {
1283 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1284 'url': 'cBvYw8_A0vQ',
1285 'info_dict': {
1286 'id': 'cBvYw8_A0vQ',
1287 'ext': 'mp4',
1288 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1289 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1290 'upload_date': '20201120',
1291 'uploader': 'Walk around Japan',
1292 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1293 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1294 },
1295 'params': {
1296 'skip_download': True,
1297 },
1298 },
2eb88d95
PH
1299 ]
1300
e0df6211
PH
1301 def __init__(self, *args, **kwargs):
1302 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1303 self._code_cache = {}
83799698 1304 self._player_cache = {}
e0df6211 1305
60064c53
PH
1306 def _signature_cache_id(self, example_sig):
1307 """ Return a string representation of a signature """
78caa52a 1308 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1309
e40c758c
S
1310 @classmethod
1311 def _extract_player_info(cls, player_url):
1312 for player_re in cls._PLAYER_INFO_RE:
1313 id_m = re.search(player_re, player_url)
1314 if id_m:
1315 break
1316 else:
c081b35c 1317 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1318 return id_m.group('id')
e40c758c
S
1319
1320 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1321 player_id = self._extract_player_info(player_url)
e0df6211 1322
c4417ddb 1323 # Read from filesystem cache
545cc85d 1324 func_id = 'js_%s_%s' % (
1325 player_id, self._signature_cache_id(example_sig))
c4417ddb 1326 assert os.path.basename(func_id) == func_id
a0e07d31 1327
69ea8ca4 1328 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1329 if cache_spec is not None:
78caa52a 1330 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1331
545cc85d 1332 if player_id not in self._code_cache:
1333 self._code_cache[player_id] = self._download_webpage(
e0df6211 1334 player_url, video_id,
545cc85d 1335 note='Downloading player ' + player_id,
69ea8ca4 1336 errnote='Download of %s failed' % player_url)
545cc85d 1337 code = self._code_cache[player_id]
1338 res = self._parse_sig_js(code)
e0df6211 1339
785521bf
PH
1340 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1341 cache_res = res(test_string)
1342 cache_spec = [ord(c) for c in cache_res]
83799698 1343
69ea8ca4 1344 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1345 return res
1346
60064c53 1347 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1348 def gen_sig_code(idxs):
1349 def _genslice(start, end, step):
78caa52a 1350 starts = '' if start == 0 else str(start)
8bcc8756 1351 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1352 steps = '' if step == 1 else (':%d' % step)
78caa52a 1353 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1354
1355 step = None
7af808a5
PH
1356 # Quelch pyflakes warnings - start will be set when step is set
1357 start = '(Never used)'
edf3e38e
PH
1358 for i, prev in zip(idxs[1:], idxs[:-1]):
1359 if step is not None:
1360 if i - prev == step:
1361 continue
1362 yield _genslice(start, prev, step)
1363 step = None
1364 continue
1365 if i - prev in [-1, 1]:
1366 step = i - prev
1367 start = prev
1368 continue
1369 else:
78caa52a 1370 yield 's[%d]' % prev
edf3e38e 1371 if step is None:
78caa52a 1372 yield 's[%d]' % i
edf3e38e
PH
1373 else:
1374 yield _genslice(start, i, step)
1375
78caa52a 1376 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1377 cache_res = func(test_string)
edf3e38e 1378 cache_spec = [ord(c) for c in cache_res]
78caa52a 1379 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1380 signature_id_tuple = '(%s)' % (
1381 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1382 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1383 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1384 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1385
e0df6211
PH
1386 def _parse_sig_js(self, jscode):
1387 funcname = self._search_regex(
abefc03f
S
1388 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1389 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1390 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1391 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1392 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1393 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1394 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1395 # Obsolete patterns
1396 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1397 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1398 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1399 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1400 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1401 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1402 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1403 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1404 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1405
1406 jsi = JSInterpreter(jscode)
1407 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1408 return lambda s: initial_function([s])
1409
545cc85d 1410 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1411 """Turn the encrypted s field into a working signature"""
6b37f0be 1412
c8bf86d5 1413 if player_url is None:
69ea8ca4 1414 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1415
69ea8ca4 1416 if player_url.startswith('//'):
78caa52a 1417 player_url = 'https:' + player_url
3c90cc8b
S
1418 elif not re.match(r'https?://', player_url):
1419 player_url = compat_urlparse.urljoin(
1420 'https://www.youtube.com', player_url)
c8bf86d5 1421 try:
62af3a0e 1422 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1423 if player_id not in self._player_cache:
1424 func = self._extract_signature_function(
60064c53 1425 video_id, player_url, s
c8bf86d5
PH
1426 )
1427 self._player_cache[player_id] = func
1428 func = self._player_cache[player_id]
1429 if self._downloader.params.get('youtube_print_sig_code'):
60064c53 1430 self._print_sig_code(func, s)
c8bf86d5
PH
1431 return func(s)
1432 except Exception as e:
1433 tb = traceback.format_exc()
1434 raise ExtractorError(
78caa52a 1435 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1436
545cc85d 1437 def _mark_watched(self, video_id, player_response):
21c340b8
S
1438 playback_url = url_or_none(try_get(
1439 player_response,
545cc85d 1440 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1441 if not playback_url:
1442 return
1443 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1444 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1445
1446 # cpn generation algorithm is reverse engineered from base.js.
1447 # In fact it works even with dummy cpn.
1448 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1449 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1450
1451 qs.update({
1452 'ver': ['2'],
1453 'cpn': [cpn],
1454 })
1455 playback_url = compat_urlparse.urlunparse(
15707c7e 1456 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1457
1458 self._download_webpage(
1459 playback_url, video_id, 'Marking watched',
1460 'Unable to mark watched', fatal=False)
1461
66c9fa36
S
1462 @staticmethod
1463 def _extract_urls(webpage):
1464 # Embedded YouTube player
1465 entries = [
1466 unescapeHTML(mobj.group('url'))
1467 for mobj in re.finditer(r'''(?x)
1468 (?:
1469 <iframe[^>]+?src=|
1470 data-video-url=|
1471 <embed[^>]+?src=|
1472 embedSWF\(?:\s*|
1473 <object[^>]+data=|
1474 new\s+SWFObject\(
1475 )
1476 (["\'])
1477 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1478 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1479 \1''', webpage)]
1480
1481 # lazyYT YouTube embed
1482 entries.extend(list(map(
1483 unescapeHTML,
1484 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1485
1486 # Wordpress "YouTube Video Importer" plugin
1487 matches = re.findall(r'''(?x)<div[^>]+
1488 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1489 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1490 entries.extend(m[-1] for m in matches)
1491
1492 return entries
1493
1494 @staticmethod
1495 def _extract_url(webpage):
1496 urls = YoutubeIE._extract_urls(webpage)
1497 return urls[0] if urls else None
1498
97665381
PH
1499 @classmethod
1500 def extract_id(cls, url):
1501 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1502 if mobj is None:
69ea8ca4 1503 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1504 video_id = mobj.group(2)
1505 return video_id
1506
545cc85d 1507 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1508 chapters_list = try_get(
8bdd16b4 1509 data,
84213ea8
S
1510 lambda x: x['playerOverlays']
1511 ['playerOverlayRenderer']
1512 ['decoratedPlayerBarRenderer']
1513 ['decoratedPlayerBarRenderer']
1514 ['playerBar']
1515 ['chapteredPlayerBarRenderer']
1516 ['chapters'],
1517 list)
1518 if not chapters_list:
1519 return
1520
1521 def chapter_time(chapter):
1522 return float_or_none(
1523 try_get(
1524 chapter,
1525 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1526 int),
1527 scale=1000)
1528 chapters = []
1529 for next_num, chapter in enumerate(chapters_list, start=1):
1530 start_time = chapter_time(chapter)
1531 if start_time is None:
1532 continue
1533 end_time = (chapter_time(chapters_list[next_num])
1534 if next_num < len(chapters_list) else duration)
1535 if end_time is None:
1536 continue
1537 title = try_get(
1538 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1539 compat_str)
1540 chapters.append({
1541 'start_time': start_time,
1542 'end_time': end_time,
1543 'title': title,
1544 })
1545 return chapters
1546
545cc85d 1547 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1548 return self._parse_json(self._search_regex(
1549 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1550 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1551
d92f5d5a 1552 @staticmethod
1553 def parse_time_text(time_text):
1554 """
1555 Parse the comment time text
1556 time_text is in the format 'X units ago (edited)'
1557 """
1558 time_text_split = time_text.split(' ')
1559 if len(time_text_split) >= 3:
1560 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1561
a1c5d2ca
M
1562 @staticmethod
1563 def _join_text_entries(runs):
1564 text = None
1565 for run in runs:
1566 if not isinstance(run, dict):
1567 continue
1568 sub_text = try_get(run, lambda x: x['text'], compat_str)
1569 if sub_text:
1570 if not text:
1571 text = sub_text
1572 continue
1573 text += sub_text
1574 return text
1575
1576 def _extract_comment(self, comment_renderer, parent=None):
1577 comment_id = comment_renderer.get('commentId')
1578 if not comment_id:
1579 return
1580 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1581 text = self._join_text_entries(comment_text_runs) or ''
1582 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1583 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1584 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1585 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1586 author_id = try_get(comment_renderer,
1587 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1588 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1589 lambda x: x['likeCount']), compat_str)) or 0
1590 author_thumbnail = try_get(comment_renderer,
1591 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1592
1593 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1594 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1595 return {
1596 'id': comment_id,
1597 'text': text,
d92f5d5a 1598 'timestamp': timestamp,
a1c5d2ca
M
1599 'time_text': time_text,
1600 'like_count': votes,
1601 'is_favorited': is_liked,
1602 'author': author,
1603 'author_id': author_id,
1604 'author_thumbnail': author_thumbnail,
1605 'author_is_uploader': author_is_uploader,
1606 'parent': parent or 'root'
1607 }
1608
1609 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1610 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1611
1612 def extract_thread(parent_renderer):
1613 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1614 if not parent:
1615 comment_counts[2] = 0
1616 for content in contents:
1617 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1618 comment_renderer = try_get(
1619 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1620 content, (lambda x: x['commentRenderer'], dict))
1621
1622 if not comment_renderer:
1623 continue
1624 comment = self._extract_comment(comment_renderer, parent)
1625 if not comment:
1626 continue
1627 comment_counts[0] += 1
1628 yield comment
1629 # Attempt to get the replies
1630 comment_replies_renderer = try_get(
1631 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1632
1633 if comment_replies_renderer:
1634 comment_counts[2] += 1
1635 comment_entries_iter = self._comment_entries(
f4f751af 1636 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1637 parent=comment.get('id'), session_token_list=session_token_list,
1638 comment_counts=comment_counts)
1639
1640 for reply_comment in comment_entries_iter:
1641 yield reply_comment
1642
1643 if not comment_counts:
1644 # comment so far, est. total comments, current comment thread #
1645 comment_counts = [0, 0, 0]
a1c5d2ca
M
1646
1647 # TODO: Generalize the download code with TabIE
f4f751af 1648 context = self._extract_context(ytcfg)
1649 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1650 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1651 first_continuation = False
1652 if parent is None:
1653 first_continuation = True
1654
1655 for page_num in itertools.count(0):
1656 if not continuation:
1657 break
f4f751af 1658 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a1c5d2ca
M
1659 retries = self._downloader.params.get('extractor_retries', 3)
1660 count = -1
1661 last_error = None
1662
1663 while count < retries:
1664 count += 1
1665 if last_error:
1666 self.report_warning('%s. Retrying ...' % last_error)
1667 try:
1668 query = {
1669 'ctoken': continuation['ctoken'],
1670 'pbj': 1,
1671 'type': 'next',
1672 }
1673 if parent:
1674 query['action_get_comment_replies'] = 1
1675 else:
1676 query['action_get_comments'] = 1
1677
1678 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1679 if page_num == 0:
1680 if first_continuation:
d92f5d5a 1681 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1682 else:
d92f5d5a 1683 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1684 else:
d92f5d5a 1685 note_prefix = '%sDownloading comment%s page %d %s' % (
1686 ' ' if parent else '',
a1c5d2ca
M
1687 ' replies' if parent else '',
1688 page_num,
1689 comment_prog_str)
1690
1691 browse = self._download_json(
1692 'https://www.youtube.com/comment_service_ajax', None,
1693 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1694 headers=headers, query=query,
1695 data=urlencode_postdata({
1696 'session_token': session_token_list[0]
1697 }))
1698 except ExtractorError as e:
1699 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1700 if e.cause.code == 413:
d92f5d5a 1701 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1702 return
1703 # Downloading page may result in intermittent 5xx HTTP error
1704 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1705 last_error = 'HTTP Error %s' % e.cause.code
1706 if e.cause.code == 404:
d92f5d5a 1707 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1708 if count < retries:
1709 continue
1710 raise
1711 else:
1712 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1713 if session_token:
1714 session_token_list[0] = session_token
1715
1716 response = try_get(browse,
1717 (lambda x: x['response'],
1718 lambda x: x[1]['response'])) or {}
1719
1720 if response.get('continuationContents'):
1721 break
1722
1723 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1724 if browse.get('reload'):
d92f5d5a 1725 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1726
1727 # TODO: not tested, merged from old extractor
1728 err_msg = browse.get('externalErrorMessage')
1729 if err_msg:
1730 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1731
1732 # Youtube sometimes sends incomplete data
1733 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1734 last_error = 'Incomplete data received'
1735 if count >= retries:
1736 self._downloader.report_error(last_error)
1737
1738 if not response:
1739 break
f4f751af 1740 visitor_data = try_get(
1741 response,
1742 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1743 compat_str) or visitor_data
a1c5d2ca
M
1744
1745 known_continuation_renderers = {
1746 'itemSectionContinuation': extract_thread,
1747 'commentRepliesContinuation': extract_thread
1748 }
1749
1750 # extract next root continuation from the results
1751 continuation_contents = try_get(
1752 response, lambda x: x['continuationContents'], dict) or {}
1753
1754 for key, value in continuation_contents.items():
1755 if key not in known_continuation_renderers:
1756 continue
1757 continuation_renderer = value
1758
1759 if first_continuation:
1760 first_continuation = False
1761 expected_comment_count = try_get(
1762 continuation_renderer,
1763 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1764 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1765 compat_str)
1766
1767 if expected_comment_count:
1768 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1769 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1770 yield comment_counts[1]
1771
1772 # TODO: cli arg.
1773 # 1/True for newest, 0/False for popular (default)
1774 comment_sort_index = int(True)
1775 sort_continuation_renderer = try_get(
1776 continuation_renderer,
1777 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1778 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1779 # If this fails, the initial continuation page
1780 # starts off with popular anyways.
1781 if sort_continuation_renderer:
1782 continuation = YoutubeTabIE._build_continuation_query(
1783 continuation=sort_continuation_renderer.get('continuation'),
1784 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1785 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1786 break
1787
1788 for entry in known_continuation_renderers[key](continuation_renderer):
1789 yield entry
1790
1791 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1792 break
1793
1794 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1795 """Entry for comment extraction"""
1796 comments = []
1797 known_entry_comment_renderers = (
1798 'itemSectionRenderer',
1799 )
1800 estimated_total = 0
1801 for entry in contents:
1802 for key, renderer in entry.items():
1803 if key not in known_entry_comment_renderers:
1804 continue
1805
1806 comment_iter = self._comment_entries(
1807 renderer,
1808 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1809 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1810 ytcfg=ytcfg,
a1c5d2ca
M
1811 session_token_list=[xsrf_token])
1812
1813 for comment in comment_iter:
1814 if isinstance(comment, int):
1815 estimated_total = comment
1816 continue
1817 comments.append(comment)
1818 break
d92f5d5a 1819 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1820 return {
1821 'comments': comments,
1822 'comment_count': len(comments),
1823 }
1824
c5e8d7af 1825 def _real_extract(self, url):
cf7e015f 1826 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1827 video_id = self._match_id(url)
1828 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1829 webpage_url = base_url + 'watch?v=' + video_id
1830 webpage = self._download_webpage(
cce889b9 1831 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1832
1833 player_response = None
1834 if webpage:
1835 player_response = self._extract_yt_initial_variable(
1836 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1837 video_id, 'initial player response')
f4f751af 1838
1839 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1840 if not player_response:
1841 player_response = self._call_api(
f4f751af 1842 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1843
1844 playability_status = player_response.get('playabilityStatus') or {}
1845 if playability_status.get('reason') == 'Sign in to confirm your age':
1846 pr = self._parse_json(try_get(compat_parse_qs(
1847 self._download_webpage(
1848 base_url + 'get_video_info', video_id,
1849 'Refetching age-gated info webpage',
1850 'unable to download video info webpage', query={
1851 'video_id': video_id,
7c60c33e 1852 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
545cc85d 1853 }, fatal=False)),
1854 lambda x: x['player_response'][0],
1855 compat_str) or '{}', video_id)
1856 if pr:
1857 player_response = pr
1858
1859 trailer_video_id = try_get(
1860 playability_status,
1861 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1862 compat_str)
1863 if trailer_video_id:
1864 return self.url_result(
1865 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1866
545cc85d 1867 def get_text(x):
1868 if not x:
c2d125d9 1869 return
f7ad7160 1870 text = x.get('simpleText')
1871 if text and isinstance(text, compat_str):
1872 return text
1873 runs = x.get('runs')
1874 if not isinstance(runs, list):
1875 return
1876 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
15be3eb5 1877
545cc85d 1878 search_meta = (
1879 lambda x: self._html_search_meta(x, webpage, default=None)) \
1880 if webpage else lambda x: None
dbdaaa23 1881
545cc85d 1882 video_details = player_response.get('videoDetails') or {}
37357d21 1883 microformat = try_get(
545cc85d 1884 player_response,
1885 lambda x: x['microformat']['playerMicroformatRenderer'],
1886 dict) or {}
1887 video_title = video_details.get('title') \
1888 or get_text(microformat.get('title')) \
1889 or search_meta(['og:title', 'twitter:title', 'title'])
1890 video_description = video_details.get('shortDescription')
cf7e015f 1891
8fe10494 1892 if not smuggled_data.get('force_singlefeed', False):
5e1eddb9 1893 if not self._downloader.params.get('noplaylist'):
8fe10494
S
1894 multifeed_metadata_list = try_get(
1895 player_response,
1896 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1897 compat_str)
8fe10494
S
1898 if multifeed_metadata_list:
1899 entries = []
1900 feed_ids = []
1901 for feed in multifeed_metadata_list.split(','):
1902 # Unquote should take place before split on comma (,) since textual
1903 # fields may contain comma as well (see
067aa17e 1904 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1905 feed_data = compat_parse_qs(
1906 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1907
1908 def feed_entry(name):
545cc85d 1909 return try_get(
1910 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1911
1912 feed_id = feed_entry('id')
1913 if not feed_id:
1914 continue
1915 feed_title = feed_entry('title')
1916 title = video_title
1917 if feed_title:
1918 title += ' (%s)' % feed_title
8fe10494
S
1919 entries.append({
1920 '_type': 'url_transparent',
1921 'ie_key': 'Youtube',
1922 'url': smuggle_url(
545cc85d 1923 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1924 {'force_singlefeed': True}),
6b09401b 1925 'title': title,
8fe10494 1926 })
6b09401b 1927 feed_ids.append(feed_id)
8fe10494
S
1928 self.to_screen(
1929 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
1930 % (', '.join(feed_ids), video_id))
545cc85d 1931 return self.playlist_result(
1932 entries, video_id, video_title, video_description)
8fe10494
S
1933 else:
1934 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 1935
545cc85d 1936 formats = []
1937 itags = []
cc2db878 1938 itag_qualities = {}
545cc85d 1939 player_url = None
dca3ff4a 1940 q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
545cc85d 1941 streaming_data = player_response.get('streamingData') or {}
1942 streaming_formats = streaming_data.get('formats') or []
1943 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
1944 for fmt in streaming_formats:
1945 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
1946 continue
321bf820 1947
cc2db878 1948 itag = str_or_none(fmt.get('itag'))
1949 quality = fmt.get('quality')
1950 if itag and quality:
1951 itag_qualities[itag] = quality
1952 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
1953 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
1954 # number of fragment that would subsequently requested with (`&sq=N`)
1955 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
1956 continue
1957
545cc85d 1958 fmt_url = fmt.get('url')
1959 if not fmt_url:
1960 sc = compat_parse_qs(fmt.get('signatureCipher'))
1961 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
1962 encrypted_sig = try_get(sc, lambda x: x['s'][0])
1963 if not (sc and fmt_url and encrypted_sig):
1964 continue
1965 if not player_url:
1966 if not webpage:
1967 continue
1968 player_url = self._search_regex(
1969 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1970 webpage, 'player URL', fatal=False)
1971 if not player_url:
201e9eaa 1972 continue
545cc85d 1973 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
1974 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
1975 fmt_url += '&' + sp + '=' + signature
1976
545cc85d 1977 if itag:
1978 itags.append(itag)
cc2db878 1979 tbr = float_or_none(
1980 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 1981 dct = {
1982 'asr': int_or_none(fmt.get('audioSampleRate')),
1983 'filesize': int_or_none(fmt.get('contentLength')),
1984 'format_id': itag,
1985 'format_note': fmt.get('qualityLabel') or quality,
1986 'fps': int_or_none(fmt.get('fps')),
1987 'height': int_or_none(fmt.get('height')),
dca3ff4a 1988 'quality': q(quality),
cc2db878 1989 'tbr': tbr,
545cc85d 1990 'url': fmt_url,
1991 'width': fmt.get('width'),
1992 }
1993 mimetype = fmt.get('mimeType')
1994 if mimetype:
1995 mobj = re.match(
1996 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
1997 if mobj:
1998 dct['ext'] = mimetype2ext(mobj.group(1))
1999 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2000 no_audio = dct.get('acodec') == 'none'
2001 no_video = dct.get('vcodec') == 'none'
2002 if no_audio:
2003 dct['vbr'] = tbr
2004 if no_video:
2005 dct['abr'] = tbr
2006 if no_audio or no_video:
545cc85d 2007 dct['downloader_options'] = {
2008 # Youtube throttles chunks >~10M
2009 'http_chunk_size': 10485760,
bf1317d2 2010 }
7c60c33e 2011 if dct.get('ext'):
2012 dct['container'] = dct['ext'] + '_dash'
545cc85d 2013 formats.append(dct)
2014
2015 hls_manifest_url = streaming_data.get('hlsManifestUrl')
2016 if hls_manifest_url:
2017 for f in self._extract_m3u8_formats(
2018 hls_manifest_url, video_id, 'mp4', fatal=False):
2019 itag = self._search_regex(
2020 r'/itag/(\d+)', f['url'], 'itag', default=None)
2021 if itag:
2022 f['format_id'] = itag
2023 formats.append(f)
2024
1418a043 2025 if self._downloader.params.get('youtube_include_dash_manifest', True):
545cc85d 2026 dash_manifest_url = streaming_data.get('dashManifestUrl')
2027 if dash_manifest_url:
545cc85d 2028 for f in self._extract_mpd_formats(
2029 dash_manifest_url, video_id, fatal=False):
cc2db878 2030 itag = f['format_id']
2031 if itag in itags:
2032 continue
dca3ff4a 2033 if itag in itag_qualities:
2034 # Not actually usefull since the sorting is already done with "quality,res,fps,codec"
2035 # but kept to maintain feature parity (and code similarity) with youtube-dl
2036 # Remove if this causes any issues with sorting in future
2037 f['quality'] = q(itag_qualities[itag])
545cc85d 2038 filesize = int_or_none(self._search_regex(
2039 r'/clen/(\d+)', f.get('fragment_base_url')
2040 or f['url'], 'file size', default=None))
2041 if filesize:
2042 f['filesize'] = filesize
cc2db878 2043 formats.append(f)
bf1317d2 2044
545cc85d 2045 if not formats:
63ad4d43 2046 if not self._downloader.params.get('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
545cc85d 2047 raise ExtractorError(
2048 'This video is DRM protected.', expected=True)
2049 pemr = try_get(
2050 playability_status,
2051 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2052 dict) or {}
2053 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2054 subreason = pemr.get('subreason')
2055 if subreason:
2056 subreason = clean_html(get_text(subreason))
2057 if subreason == 'The uploader has not made this video available in your country.':
2058 countries = microformat.get('availableCountries')
2059 if not countries:
2060 regions_allowed = search_meta('regionsAllowed')
2061 countries = regions_allowed.split(',') if regions_allowed else None
2062 self.raise_geo_restricted(
2063 subreason, countries)
2064 reason += '\n' + subreason
2065 if reason:
2066 raise ExtractorError(reason, expected=True)
bf1317d2 2067
545cc85d 2068 self._sort_formats(formats)
bf1317d2 2069
545cc85d 2070 keywords = video_details.get('keywords') or []
2071 if not keywords and webpage:
2072 keywords = [
2073 unescapeHTML(m.group('content'))
2074 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2075 for keyword in keywords:
2076 if keyword.startswith('yt:stretch='):
2077 w, h = keyword.split('=')[1].split(':')
2078 w, h = int(w), int(h)
2079 if w > 0 and h > 0:
2080 ratio = w / h
2081 for f in formats:
2082 if f.get('vcodec') != 'none':
2083 f['stretched_ratio'] = ratio
6449cd80 2084
545cc85d 2085 thumbnails = []
2086 for container in (video_details, microformat):
2087 for thumbnail in (try_get(
2088 container,
2089 lambda x: x['thumbnail']['thumbnails'], list) or []):
2090 thumbnail_url = thumbnail.get('url')
2091 if not thumbnail_url:
bf1317d2 2092 continue
1988fab7 2093 # Sometimes youtube gives a wrong thumbnail URL. See:
2094 # https://github.com/yt-dlp/yt-dlp/issues/233
2095 # https://github.com/ytdl-org/youtube-dl/issues/28023
2096 if 'maxresdefault' in thumbnail_url:
2097 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2098 thumbnails.append({
2099 'height': int_or_none(thumbnail.get('height')),
2100 'url': thumbnail_url,
2101 'width': int_or_none(thumbnail.get('width')),
2102 })
2103 if thumbnails:
2104 break
a6211d23 2105 else:
545cc85d 2106 thumbnail = search_meta(['og:image', 'twitter:image'])
2107 if thumbnail:
2108 thumbnails = [{'url': thumbnail}]
2109
2110 category = microformat.get('category') or search_meta('genre')
2111 channel_id = video_details.get('channelId') \
2112 or microformat.get('externalChannelId') \
2113 or search_meta('channelId')
2114 duration = int_or_none(
2115 video_details.get('lengthSeconds')
2116 or microformat.get('lengthSeconds')) \
2117 or parse_duration(search_meta('duration'))
2118 is_live = video_details.get('isLive')
2119 owner_profile_url = microformat.get('ownerProfileUrl')
2120
2121 info = {
2122 'id': video_id,
2123 'title': self._live_title(video_title) if is_live else video_title,
2124 'formats': formats,
2125 'thumbnails': thumbnails,
2126 'description': video_description,
2127 'upload_date': unified_strdate(
2128 microformat.get('uploadDate')
2129 or search_meta('uploadDate')),
2130 'uploader': video_details['author'],
2131 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2132 'uploader_url': owner_profile_url,
2133 'channel_id': channel_id,
2134 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2135 'duration': duration,
2136 'view_count': int_or_none(
2137 video_details.get('viewCount')
2138 or microformat.get('viewCount')
2139 or search_meta('interactionCount')),
2140 'average_rating': float_or_none(video_details.get('averageRating')),
2141 'age_limit': 18 if (
2142 microformat.get('isFamilySafe') is False
2143 or search_meta('isFamilyFriendly') == 'false'
2144 or search_meta('og:restrictions:age') == '18+') else 0,
2145 'webpage_url': webpage_url,
2146 'categories': [category] if category else None,
2147 'tags': keywords,
2148 'is_live': is_live,
2149 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2150 'was_live': video_details.get('isLiveContent'),
545cc85d 2151 }
b477fc13 2152
545cc85d 2153 pctr = try_get(
2154 player_response,
2155 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2156 subtitles = {}
2157 if pctr:
2158 def process_language(container, base_url, lang_code, query):
2159 lang_subs = []
2160 for fmt in self._SUBTITLE_FORMATS:
2161 query.update({
2162 'fmt': fmt,
2163 })
2164 lang_subs.append({
2165 'ext': fmt,
2166 'url': update_url_query(base_url, query),
2167 })
2168 container[lang_code] = lang_subs
7e72694b 2169
545cc85d 2170 for caption_track in (pctr.get('captionTracks') or []):
2171 base_url = caption_track.get('baseUrl')
2172 if not base_url:
2173 continue
2174 if caption_track.get('kind') != 'asr':
2175 lang_code = caption_track.get('languageCode')
2176 if not lang_code:
2177 continue
2178 process_language(
2179 subtitles, base_url, lang_code, {})
2180 continue
2181 automatic_captions = {}
2182 for translation_language in (pctr.get('translationLanguages') or []):
2183 translation_language_code = translation_language.get('languageCode')
2184 if not translation_language_code:
2185 continue
2186 process_language(
2187 automatic_captions, base_url, translation_language_code,
2188 {'tlang': translation_language_code})
2189 info['automatic_captions'] = automatic_captions
2190 info['subtitles'] = subtitles
7e72694b 2191
545cc85d 2192 parsed_url = compat_urllib_parse_urlparse(url)
2193 for component in [parsed_url.fragment, parsed_url.query]:
2194 query = compat_parse_qs(component)
2195 for k, v in query.items():
2196 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2197 d_k += '_time'
2198 if d_k not in info and k in s_ks:
2199 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2200
2201 # Youtube Music Auto-generated description
822b9d9c 2202 if video_description:
38d70284 2203 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2204 if mobj:
822b9d9c
RA
2205 release_year = mobj.group('release_year')
2206 release_date = mobj.group('release_date')
2207 if release_date:
2208 release_date = release_date.replace('-', '')
2209 if not release_year:
545cc85d 2210 release_year = release_date[:4]
2211 info.update({
2212 'album': mobj.group('album'.strip()),
2213 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2214 'track': mobj.group('track').strip(),
2215 'release_date': release_date,
cc2db878 2216 'release_year': int_or_none(release_year),
545cc85d 2217 })
7e72694b 2218
545cc85d 2219 initial_data = None
2220 if webpage:
2221 initial_data = self._extract_yt_initial_variable(
2222 webpage, self._YT_INITIAL_DATA_RE, video_id,
2223 'yt initial data')
2224 if not initial_data:
2225 initial_data = self._call_api(
f4f751af 2226 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2227
2228 if not is_live:
2229 try:
2230 # This will error if there is no livechat
2231 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2232 info['subtitles']['live_chat'] = [{
394dcd44 2233 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2234 'video_id': video_id,
2235 'ext': 'json',
2236 'protocol': 'youtube_live_chat_replay',
2237 }]
2238 except (KeyError, IndexError, TypeError):
2239 pass
2240
2241 if initial_data:
2242 chapters = self._extract_chapters_from_json(
2243 initial_data, video_id, duration)
2244 if not chapters:
2245 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2246 contents = try_get(
2247 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2248 list)
2249 if not contents:
2250 continue
2251
2252 def chapter_time(mmlir):
2253 return parse_duration(
2254 get_text(mmlir.get('timeDescription')))
2255
2256 chapters = []
2257 for next_num, content in enumerate(contents, start=1):
2258 mmlir = content.get('macroMarkersListItemRenderer') or {}
2259 start_time = chapter_time(mmlir)
2260 end_time = chapter_time(try_get(
2261 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2262 if next_num < len(contents) else duration
2263 if start_time is None or end_time is None:
2264 continue
2265 chapters.append({
2266 'start_time': start_time,
2267 'end_time': end_time,
2268 'title': get_text(mmlir.get('title')),
2269 })
2270 if chapters:
2271 break
2272 if chapters:
2273 info['chapters'] = chapters
2274
2275 contents = try_get(
2276 initial_data,
2277 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2278 list) or []
2279 for content in contents:
2280 vpir = content.get('videoPrimaryInfoRenderer')
2281 if vpir:
2282 stl = vpir.get('superTitleLink')
2283 if stl:
2284 stl = get_text(stl)
2285 if try_get(
2286 vpir,
2287 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2288 info['location'] = stl
2289 else:
2290 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2291 if mobj:
2292 info.update({
2293 'series': mobj.group(1),
2294 'season_number': int(mobj.group(2)),
2295 'episode_number': int(mobj.group(3)),
2296 })
2297 for tlb in (try_get(
2298 vpir,
2299 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2300 list) or []):
2301 tbr = tlb.get('toggleButtonRenderer') or {}
2302 for getter, regex in [(
2303 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2304 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2305 lambda x: x['accessibility'],
2306 lambda x: x['accessibilityData']['accessibilityData'],
2307 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2308 label = (try_get(tbr, getter, dict) or {}).get('label')
2309 if label:
2310 mobj = re.match(regex, label)
2311 if mobj:
2312 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2313 break
2314 sbr_tooltip = try_get(
2315 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2316 if sbr_tooltip:
2317 like_count, dislike_count = sbr_tooltip.split(' / ')
2318 info.update({
2319 'like_count': str_to_int(like_count),
2320 'dislike_count': str_to_int(dislike_count),
2321 })
2322 vsir = content.get('videoSecondaryInfoRenderer')
2323 if vsir:
2324 info['channel'] = get_text(try_get(
2325 vsir,
2326 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2327 dict))
545cc85d 2328 rows = try_get(
2329 vsir,
2330 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2331 list) or []
2332 multiple_songs = False
2333 for row in rows:
2334 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2335 multiple_songs = True
2336 break
2337 for row in rows:
2338 mrr = row.get('metadataRowRenderer') or {}
2339 mrr_title = mrr.get('title')
2340 if not mrr_title:
2341 continue
2342 mrr_title = get_text(mrr['title'])
2343 mrr_contents_text = get_text(mrr['contents'][0])
2344 if mrr_title == 'License':
2345 info['license'] = mrr_contents_text
2346 elif not multiple_songs:
2347 if mrr_title == 'Album':
2348 info['album'] = mrr_contents_text
2349 elif mrr_title == 'Artist':
2350 info['artist'] = mrr_contents_text
2351 elif mrr_title == 'Song':
2352 info['track'] = mrr_contents_text
2353
2354 fallbacks = {
2355 'channel': 'uploader',
2356 'channel_id': 'uploader_id',
2357 'channel_url': 'uploader_url',
2358 }
2359 for to, frm in fallbacks.items():
2360 if not info.get(to):
2361 info[to] = info.get(frm)
2362
2363 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2364 v = info.get(s_k)
2365 if v:
2366 info[d_k] = v
b84071c0 2367
c224251a
M
2368 is_private = bool_or_none(video_details.get('isPrivate'))
2369 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2370 is_membersonly = None
b28f8d24 2371 is_premium = None
c224251a
M
2372 if initial_data and is_private is not None:
2373 is_membersonly = False
b28f8d24 2374 is_premium = False
c224251a
M
2375 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2376 for content in contents or []:
2377 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2378 for badge in badges or []:
2379 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2380 if label.lower() == 'members only':
2381 is_membersonly = True
2382 break
b28f8d24
M
2383 elif label.lower() == 'premium':
2384 is_premium = True
2385 break
2386 if is_membersonly or is_premium:
c224251a
M
2387 break
2388
2389 # TODO: Add this for playlists
2390 info['availability'] = self._availability(
2391 is_private=is_private,
b28f8d24 2392 needs_premium=is_premium,
c224251a
M
2393 needs_subscription=is_membersonly,
2394 needs_auth=info['age_limit'] >= 18,
2395 is_unlisted=None if is_private is None else is_unlisted)
2396
06167fbb 2397 # get xsrf for annotations or comments
2398 get_annotations = self._downloader.params.get('writeannotations', False)
2399 get_comments = self._downloader.params.get('getcomments', False)
2400 if get_annotations or get_comments:
29f7c58a 2401 xsrf_token = None
545cc85d 2402 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2403 if ytcfg:
2404 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2405 if not xsrf_token:
2406 xsrf_token = self._search_regex(
2407 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2408 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2409
2410 # annotations
06167fbb 2411 if get_annotations:
64b6a4e9
RA
2412 invideo_url = try_get(
2413 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2414 if xsrf_token and invideo_url:
29f7c58a 2415 xsrf_field_name = None
2416 if ytcfg:
2417 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2418 if not xsrf_field_name:
2419 xsrf_field_name = self._search_regex(
2420 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2421 webpage, 'xsrf field name',
29f7c58a 2422 group='xsrf_field_name', default='session_token')
8a784c74 2423 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2424 self._proto_relative_url(invideo_url),
2425 video_id, note='Downloading annotations',
2426 errnote='Unable to download video annotations', fatal=False,
2427 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2428
277d6ff5 2429 if get_comments:
a1c5d2ca 2430 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2431
545cc85d 2432 self.mark_watched(video_id, player_response)
d77ab8e2 2433
545cc85d 2434 return info
c5e8d7af 2435
5f6a1245 2436
8bdd16b4 2437class YoutubeTabIE(YoutubeBaseInfoExtractor):
2438 IE_DESC = 'YouTube.com tab'
70d5c17b 2439 _VALID_URL = r'''(?x)
2440 https?://
2441 (?:\w+\.)?
2442 (?:
2443 youtube(?:kids)?\.com|
2444 invidio\.us
2445 )/
2446 (?:
2447 (?:channel|c|user)/|
2448 (?P<not_channel>
9ba5705a 2449 feed/|hashtag/|
70d5c17b 2450 (?:playlist|watch)\?.*?\blist=
2451 )|
29f7c58a 2452 (?!(?:%s)\b) # Direct URLs
70d5c17b 2453 )
2454 (?P<id>[^/?\#&]+)
2455 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2456 IE_NAME = 'youtube:tab'
2457
81127aa5 2458 _TESTS = [{
8bdd16b4 2459 # playlists, multipage
2460 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2461 'playlist_mincount': 94,
2462 'info_dict': {
2463 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2464 'title': 'Игорь Клейнер - Playlists',
2465 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2466 'uploader': 'Игорь Клейнер',
2467 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2468 },
2469 }, {
2470 # playlists, multipage, different order
2471 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2472 'playlist_mincount': 94,
2473 'info_dict': {
2474 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2475 'title': 'Игорь Клейнер - Playlists',
2476 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2477 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2478 'uploader': 'Игорь Клейнер',
8bdd16b4 2479 },
2480 }, {
2481 # playlists, singlepage
2482 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2483 'playlist_mincount': 4,
2484 'info_dict': {
2485 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2486 'title': 'ThirstForScience - Playlists',
2487 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2488 'uploader': 'ThirstForScience',
2489 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2490 }
2491 }, {
2492 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2493 'only_matching': True,
2494 }, {
2495 # basic, single video playlist
0e30a7b9 2496 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2497 'info_dict': {
0e30a7b9 2498 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2499 'uploader': 'Sergey M.',
2500 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2501 'title': 'youtube-dl public playlist',
81127aa5 2502 },
0e30a7b9 2503 'playlist_count': 1,
9291475f 2504 }, {
8bdd16b4 2505 # empty playlist
0e30a7b9 2506 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2507 'info_dict': {
0e30a7b9 2508 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2509 'uploader': 'Sergey M.',
2510 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2511 'title': 'youtube-dl empty playlist',
9291475f
PH
2512 },
2513 'playlist_count': 0,
2514 }, {
8bdd16b4 2515 # Home tab
2516 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2517 'info_dict': {
8bdd16b4 2518 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2519 'title': 'lex will - Home',
2520 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2521 'uploader': 'lex will',
2522 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2523 },
8bdd16b4 2524 'playlist_mincount': 2,
9291475f 2525 }, {
8bdd16b4 2526 # Videos tab
2527 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2528 'info_dict': {
8bdd16b4 2529 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2530 'title': 'lex will - Videos',
2531 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2532 'uploader': 'lex will',
2533 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2534 },
8bdd16b4 2535 'playlist_mincount': 975,
9291475f 2536 }, {
8bdd16b4 2537 # Videos tab, sorted by popular
2538 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2539 'info_dict': {
8bdd16b4 2540 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2541 'title': 'lex will - Videos',
2542 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2543 'uploader': 'lex will',
2544 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2545 },
8bdd16b4 2546 'playlist_mincount': 199,
9291475f 2547 }, {
8bdd16b4 2548 # Playlists tab
2549 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2550 'info_dict': {
8bdd16b4 2551 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2552 'title': 'lex will - Playlists',
2553 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2554 'uploader': 'lex will',
2555 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2556 },
8bdd16b4 2557 'playlist_mincount': 17,
ac7553d0 2558 }, {
8bdd16b4 2559 # Community tab
2560 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2561 'info_dict': {
8bdd16b4 2562 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2563 'title': 'lex will - Community',
2564 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2565 'uploader': 'lex will',
2566 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2567 },
2568 'playlist_mincount': 18,
87dadd45 2569 }, {
8bdd16b4 2570 # Channels tab
2571 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2572 'info_dict': {
8bdd16b4 2573 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2574 'title': 'lex will - Channels',
2575 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2576 'uploader': 'lex will',
2577 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2578 },
deaec5af 2579 'playlist_mincount': 12,
6b08cdf6 2580 }, {
a0566bbf 2581 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2582 'only_matching': True,
2583 }, {
a0566bbf 2584 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2585 'only_matching': True,
2586 }, {
a0566bbf 2587 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2588 'only_matching': True,
2589 }, {
2590 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2591 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2592 'info_dict': {
2593 'title': '29C3: Not my department',
2594 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2595 'uploader': 'Christiaan008',
2596 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2597 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2598 },
2599 'playlist_count': 96,
2600 }, {
2601 'note': 'Large playlist',
2602 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2603 'info_dict': {
8bdd16b4 2604 'title': 'Uploads from Cauchemar',
2605 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2606 'uploader': 'Cauchemar',
2607 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2608 },
8bdd16b4 2609 'playlist_mincount': 1123,
2610 }, {
2611 # even larger playlist, 8832 videos
2612 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2613 'only_matching': True,
4b7df0d3
JMF
2614 }, {
2615 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2616 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2617 'info_dict': {
acf757f4
PH
2618 'title': 'Uploads from Interstellar Movie',
2619 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2620 'uploader': 'Interstellar Movie',
8bdd16b4 2621 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2622 },
481cc733 2623 'playlist_mincount': 21,
8bdd16b4 2624 }, {
2625 # https://github.com/ytdl-org/youtube-dl/issues/21844
2626 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2627 'info_dict': {
2628 'title': 'Data Analysis with Dr Mike Pound',
2629 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2630 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2631 'uploader': 'Computerphile',
deaec5af 2632 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2633 },
2634 'playlist_mincount': 11,
2635 }, {
a0566bbf 2636 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2637 'only_matching': True,
dacb3a86
S
2638 }, {
2639 # Playlist URL that does not actually serve a playlist
2640 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2641 'info_dict': {
2642 'id': 'FqZTN594JQw',
2643 'ext': 'webm',
2644 'title': "Smiley's People 01 detective, Adventure Series, Action",
2645 'uploader': 'STREEM',
2646 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2647 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2648 'upload_date': '20150526',
2649 'license': 'Standard YouTube License',
2650 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2651 'categories': ['People & Blogs'],
2652 'tags': list,
dbdaaa23 2653 'view_count': int,
dacb3a86
S
2654 'like_count': int,
2655 'dislike_count': int,
2656 },
2657 'params': {
2658 'skip_download': True,
2659 },
13a75688 2660 'skip': 'This video is not available.',
dacb3a86 2661 'add_ie': [YoutubeIE.ie_key()],
481cc733 2662 }, {
8bdd16b4 2663 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2664 'only_matching': True,
66b48727 2665 }, {
8bdd16b4 2666 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2667 'only_matching': True,
a0566bbf 2668 }, {
2669 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2670 'info_dict': {
2671 'id': '9Auq9mYxFEE',
2672 'ext': 'mp4',
deaec5af 2673 'title': compat_str,
a0566bbf 2674 'uploader': 'Sky News',
2675 'uploader_id': 'skynews',
2676 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
2677 'upload_date': '20191102',
deaec5af 2678 'description': 'md5:85ddd75d888674631aaf9599a9a0b0ae',
a0566bbf 2679 'categories': ['News & Politics'],
2680 'tags': list,
2681 'like_count': int,
2682 'dislike_count': int,
2683 },
2684 'params': {
2685 'skip_download': True,
2686 },
2687 }, {
2688 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2689 'info_dict': {
2690 'id': 'a48o2S1cPoo',
2691 'ext': 'mp4',
2692 'title': 'The Young Turks - Live Main Show',
2693 'uploader': 'The Young Turks',
2694 'uploader_id': 'TheYoungTurks',
2695 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2696 'upload_date': '20150715',
2697 'license': 'Standard YouTube License',
2698 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2699 'categories': ['News & Politics'],
2700 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2701 'like_count': int,
2702 'dislike_count': int,
2703 },
2704 'params': {
2705 'skip_download': True,
2706 },
2707 'only_matching': True,
2708 }, {
2709 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2710 'only_matching': True,
2711 }, {
2712 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2713 'only_matching': True,
3d3dddc9 2714 }, {
2715 'url': 'https://www.youtube.com/feed/trending',
2716 'only_matching': True,
2717 }, {
2718 # needs auth
2719 'url': 'https://www.youtube.com/feed/library',
2720 'only_matching': True,
2721 }, {
2722 # needs auth
2723 'url': 'https://www.youtube.com/feed/history',
2724 'only_matching': True,
2725 }, {
2726 # needs auth
2727 'url': 'https://www.youtube.com/feed/subscriptions',
2728 'only_matching': True,
2729 }, {
2730 # needs auth
2731 'url': 'https://www.youtube.com/feed/watch_later',
2732 'only_matching': True,
2733 }, {
2734 # no longer available?
2735 'url': 'https://www.youtube.com/feed/recommended',
2736 'only_matching': True,
29f7c58a 2737 }, {
2738 # inline playlist with not always working continuations
2739 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2740 'only_matching': True,
2741 }, {
2742 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2743 'only_matching': True,
2744 }, {
2745 'url': 'https://www.youtube.com/course',
2746 'only_matching': True,
2747 }, {
2748 'url': 'https://www.youtube.com/zsecurity',
2749 'only_matching': True,
2750 }, {
2751 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2752 'only_matching': True,
2753 }, {
2754 'url': 'https://www.youtube.com/TheYoungTurks/live',
2755 'only_matching': True,
39ed931e 2756 }, {
2757 'url': 'https://www.youtube.com/hashtag/cctv9',
2758 'info_dict': {
2759 'id': 'cctv9',
2760 'title': '#cctv9',
2761 },
2762 'playlist_mincount': 350,
29f7c58a 2763 }]
2764
2765 @classmethod
2766 def suitable(cls, url):
2767 return False if YoutubeIE.suitable(url) else super(
2768 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2769
2770 def _extract_channel_id(self, webpage):
2771 channel_id = self._html_search_meta(
2772 'channelId', webpage, 'channel id', default=None)
2773 if channel_id:
2774 return channel_id
2775 channel_url = self._html_search_meta(
2776 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2777 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2778 'twitter:app:url:googleplay'), webpage, 'channel url')
2779 return self._search_regex(
2780 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2781 channel_url, 'channel id')
15f6397c 2782
8bdd16b4 2783 @staticmethod
cd7c66cf 2784 def _extract_basic_item_renderer(item):
2785 # Modified from _extract_grid_item_renderer
2786 known_renderers = (
e3c07697 2787 'playlistRenderer', 'videoRenderer', 'channelRenderer',
cd7c66cf 2788 'gridPlaylistRenderer', 'gridVideoRenderer', 'gridChannelRenderer'
2789 )
2790 for key, renderer in item.items():
2791 if key not in known_renderers:
2792 continue
2793 return renderer
8bdd16b4 2794
8bdd16b4 2795 def _grid_entries(self, grid_renderer):
2796 for item in grid_renderer['items']:
2797 if not isinstance(item, dict):
39b62db1 2798 continue
cd7c66cf 2799 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 2800 if not isinstance(renderer, dict):
2801 continue
2802 title = try_get(
2803 renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2804 # playlist
2805 playlist_id = renderer.get('playlistId')
2806 if playlist_id:
2807 yield self.url_result(
2808 'https://www.youtube.com/playlist?list=%s' % playlist_id,
2809 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
2810 video_title=title)
2811 # video
2812 video_id = renderer.get('videoId')
2813 if video_id:
2814 yield self._extract_video(renderer)
2815 # channel
2816 channel_id = renderer.get('channelId')
2817 if channel_id:
2818 title = try_get(
2819 renderer, lambda x: x['title']['simpleText'], compat_str)
2820 yield self.url_result(
2821 'https://www.youtube.com/channel/%s' % channel_id,
2822 ie=YoutubeTabIE.ie_key(), video_title=title)
2823
3d3dddc9 2824 def _shelf_entries_from_content(self, shelf_renderer):
2825 content = shelf_renderer.get('content')
2826 if not isinstance(content, dict):
8bdd16b4 2827 return
cd7c66cf 2828 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 2829 if renderer:
2830 # TODO: add support for nested playlists so each shelf is processed
2831 # as separate playlist
2832 # TODO: this includes only first N items
2833 for entry in self._grid_entries(renderer):
2834 yield entry
2835 renderer = content.get('horizontalListRenderer')
2836 if renderer:
2837 # TODO
2838 pass
8bdd16b4 2839
29f7c58a 2840 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 2841 ep = try_get(
2842 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
2843 compat_str)
2844 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 2845 if shelf_url:
29f7c58a 2846 # Skipping links to another channels, note that checking for
2847 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
2848 # will not work
2849 if skip_channels and '/channels?' in shelf_url:
2850 return
3d3dddc9 2851 title = try_get(
2852 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
2853 yield self.url_result(shelf_url, video_title=title)
2854 # Shelf may not contain shelf URL, fallback to extraction from content
2855 for entry in self._shelf_entries_from_content(shelf_renderer):
2856 yield entry
c5e8d7af 2857
8bdd16b4 2858 def _playlist_entries(self, video_list_renderer):
2859 for content in video_list_renderer['contents']:
2860 if not isinstance(content, dict):
2861 continue
2862 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
2863 if not isinstance(renderer, dict):
2864 continue
2865 video_id = renderer.get('videoId')
2866 if not video_id:
2867 continue
2868 yield self._extract_video(renderer)
07aeced6 2869
3462ffa8 2870 def _rich_entries(self, rich_grid_renderer):
2871 renderer = try_get(
70d5c17b 2872 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 2873 video_id = renderer.get('videoId')
2874 if not video_id:
2875 return
2876 yield self._extract_video(renderer)
2877
8bdd16b4 2878 def _video_entry(self, video_renderer):
2879 video_id = video_renderer.get('videoId')
2880 if video_id:
2881 return self._extract_video(video_renderer)
dacb3a86 2882
8bdd16b4 2883 def _post_thread_entries(self, post_thread_renderer):
2884 post_renderer = try_get(
2885 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
2886 if not post_renderer:
2887 return
2888 # video attachment
2889 video_renderer = try_get(
2890 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
2891 video_id = None
2892 if video_renderer:
2893 entry = self._video_entry(video_renderer)
2894 if entry:
2895 yield entry
2896 # inline video links
2897 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
2898 for run in runs:
2899 if not isinstance(run, dict):
2900 continue
2901 ep_url = try_get(
2902 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
2903 if not ep_url:
2904 continue
2905 if not YoutubeIE.suitable(ep_url):
2906 continue
2907 ep_video_id = YoutubeIE._match_id(ep_url)
2908 if video_id == ep_video_id:
2909 continue
2910 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
dacb3a86 2911
8bdd16b4 2912 def _post_thread_continuation_entries(self, post_thread_continuation):
2913 contents = post_thread_continuation.get('contents')
2914 if not isinstance(contents, list):
2915 return
2916 for content in contents:
2917 renderer = content.get('backstagePostThreadRenderer')
2918 if not isinstance(renderer, dict):
2919 continue
2920 for entry in self._post_thread_entries(renderer):
2921 yield entry
07aeced6 2922
39ed931e 2923 r''' # unused
2924 def _rich_grid_entries(self, contents):
2925 for content in contents:
2926 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
2927 if video_renderer:
2928 entry = self._video_entry(video_renderer)
2929 if entry:
2930 yield entry
2931 '''
2932
29f7c58a 2933 @staticmethod
2934 def _build_continuation_query(continuation, ctp=None):
2935 query = {
2936 'ctoken': continuation,
2937 'continuation': continuation,
2938 }
2939 if ctp:
2940 query['itct'] = ctp
2941 return query
2942
8bdd16b4 2943 @staticmethod
2944 def _extract_next_continuation_data(renderer):
2945 next_continuation = try_get(
2946 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
2947 if not next_continuation:
2948 return
2949 continuation = next_continuation.get('continuation')
2950 if not continuation:
2951 return
2952 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 2953 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 2954
8bdd16b4 2955 @classmethod
2956 def _extract_continuation(cls, renderer):
2957 next_continuation = cls._extract_next_continuation_data(renderer)
2958 if next_continuation:
2959 return next_continuation
cc2db878 2960 contents = []
2961 for key in ('contents', 'items'):
2962 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 2963 for content in contents:
2964 if not isinstance(content, dict):
2965 continue
2966 continuation_ep = try_get(
2967 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
2968 dict)
2969 if not continuation_ep:
2970 continue
2971 continuation = try_get(
2972 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
2973 if not continuation:
2974 continue
2975 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 2976 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 2977
f4f751af 2978 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 2979
70d5c17b 2980 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
2981 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
2982 for content in contents:
2983 if not isinstance(content, dict):
8bdd16b4 2984 continue
70d5c17b 2985 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 2986 if not is_renderer:
70d5c17b 2987 renderer = content.get('richItemRenderer')
3462ffa8 2988 if renderer:
2989 for entry in self._rich_entries(renderer):
2990 yield entry
2991 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 2992 continue
3462ffa8 2993 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
2994 for isr_content in isr_contents:
2995 if not isinstance(isr_content, dict):
2996 continue
69184e41 2997
2998 known_renderers = {
2999 'playlistVideoListRenderer': self._playlist_entries,
3000 'gridRenderer': self._grid_entries,
3001 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3002 'backstagePostThreadRenderer': self._post_thread_entries,
3003 'videoRenderer': lambda x: [self._video_entry(x)],
3004 }
3005 for key, renderer in isr_content.items():
3006 if key not in known_renderers:
3007 continue
3008 for entry in known_renderers[key](renderer):
3009 if entry:
3010 yield entry
3462ffa8 3011 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3012 break
70d5c17b 3013
3462ffa8 3014 if not continuation_list[0]:
3015 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3016
3017 if not continuation_list[0]:
3018 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3019
3020 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3021 tab_content = try_get(tab, lambda x: x['content'], dict)
3022 if not tab_content:
3023 return
3462ffa8 3024 parent_renderer = (
29f7c58a 3025 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3026 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3027 for entry in extract_entries(parent_renderer):
3028 yield entry
3462ffa8 3029 continuation = continuation_list[0]
f4f751af 3030 context = self._extract_context(ytcfg)
3031 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3032
8bdd16b4 3033 for page_num in itertools.count(1):
3034 if not continuation:
3035 break
f4f751af 3036 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
62bff2c1 3037 retries = self._downloader.params.get('extractor_retries', 3)
3038 count = -1
3039 last_error = None
3040 while count < retries:
3041 count += 1
3042 if last_error:
3043 self.report_warning('%s. Retrying ...' % last_error)
29f7c58a 3044 try:
a5c56234 3045 response = self._call_api(
d92f5d5a 3046 ep='browse', fatal=True, headers=headers,
a5c56234
M
3047 video_id='%s page %s' % (item_id, page_num),
3048 query={
3049 'continuation': continuation['continuation'],
3050 'clickTracking': {'clickTrackingParams': continuation['itct']},
3051 },
f4f751af 3052 context=context,
3053 api_key=self._extract_api_key(ytcfg),
a5c56234 3054 note='Downloading API JSON%s' % (' (retry #%d)' % count if count else ''))
29f7c58a 3055 except ExtractorError as e:
62bff2c1 3056 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3057 # Downloading page may result in intermittent 5xx HTTP error
3058 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3059 last_error = 'HTTP Error %s' % e.cause.code
3060 if count < retries:
29f7c58a 3061 continue
3062 raise
62bff2c1 3063 else:
62bff2c1 3064 # Youtube sometimes sends incomplete data
3065 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
26fe8ffe 3066 if dict_get(response,
3067 ('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints')):
62bff2c1 3068 break
f3eaa8dd
M
3069
3070 # Youtube may send alerts if there was an issue with the continuation page
3071 self._extract_alerts(response, expected=False)
3072
3073 last_error = 'Incomplete data received'
c705177d 3074 if count >= retries:
3075 self._downloader.report_error(last_error)
a5c56234
M
3076
3077 if not response:
8bdd16b4 3078 break
f4f751af 3079 visitor_data = try_get(
3080 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3081
69184e41 3082 known_continuation_renderers = {
3083 'playlistVideoListContinuation': self._playlist_entries,
3084 'gridContinuation': self._grid_entries,
3085 'itemSectionContinuation': self._post_thread_continuation_entries,
3086 'sectionListContinuation': extract_entries, # for feeds
3087 }
8bdd16b4 3088 continuation_contents = try_get(
69184e41 3089 response, lambda x: x['continuationContents'], dict) or {}
3090 continuation_renderer = None
3091 for key, value in continuation_contents.items():
3092 if key not in known_continuation_renderers:
3462ffa8 3093 continue
69184e41 3094 continuation_renderer = value
3095 continuation_list = [None]
3096 for entry in known_continuation_renderers[key](continuation_renderer):
3097 yield entry
3098 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3099 break
3100 if continuation_renderer:
3101 continue
c5e8d7af 3102
a1b535bd 3103 known_renderers = {
3104 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3105 'gridVideoRenderer': (self._grid_entries, 'items'),
3106 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3107 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3108 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3109 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3110 }
cce889b9 3111 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3112 continuation_items = try_get(
cce889b9 3113 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3114 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3115 video_items_renderer = None
3116 for key, value in continuation_item.items():
3117 if key not in known_renderers:
8bdd16b4 3118 continue
a1b535bd 3119 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3120 continuation_list = [None]
a1b535bd 3121 for entry in known_renderers[key][0](video_items_renderer):
3122 yield entry
9ba5705a 3123 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3124 break
3125 if video_items_renderer:
3126 continue
8bdd16b4 3127 break
9558dcec 3128
8bdd16b4 3129 @staticmethod
3130 def _extract_selected_tab(tabs):
3131 for tab in tabs:
3132 if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
3133 return tab['tabRenderer']
2b3c2546 3134 else:
8bdd16b4 3135 raise ExtractorError('Unable to find selected tab')
b82f815f 3136
8bdd16b4 3137 @staticmethod
3138 def _extract_uploader(data):
3139 uploader = {}
3140 sidebar_renderer = try_get(
3141 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3142 if sidebar_renderer:
3143 for item in sidebar_renderer:
3144 if not isinstance(item, dict):
3145 continue
3146 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3147 if not isinstance(renderer, dict):
3148 continue
3149 owner = try_get(
3150 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3151 if owner:
3152 uploader['uploader'] = owner.get('text')
3153 uploader['uploader_id'] = try_get(
3154 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3155 uploader['uploader_url'] = urljoin(
3156 'https://www.youtube.com/',
3157 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3158 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3159
d069eca7 3160 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3161 playlist_id = title = description = channel_url = channel_name = channel_id = None
3162 thumbnails_list = tags = []
3163
8bdd16b4 3164 selected_tab = self._extract_selected_tab(tabs)
3165 renderer = try_get(
3166 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3167 if renderer:
b60419c5 3168 channel_name = renderer.get('title')
3169 channel_url = renderer.get('channelUrl')
3170 channel_id = renderer.get('externalId')
39ed931e 3171 else:
64c0d954 3172 renderer = try_get(
3173 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3174
8bdd16b4 3175 if renderer:
3176 title = renderer.get('title')
ecc97af3 3177 description = renderer.get('description', '')
b60419c5 3178 playlist_id = channel_id
3179 tags = renderer.get('keywords', '').split()
3180 thumbnails_list = (
3181 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3182 or try_get(
3183 data,
3184 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3185 list)
b60419c5 3186 or [])
3187
3188 thumbnails = []
3189 for t in thumbnails_list:
3190 if not isinstance(t, dict):
3191 continue
3192 thumbnail_url = url_or_none(t.get('url'))
3193 if not thumbnail_url:
3194 continue
3195 thumbnails.append({
3196 'url': thumbnail_url,
3197 'width': int_or_none(t.get('width')),
3198 'height': int_or_none(t.get('height')),
3199 })
3462ffa8 3200 if playlist_id is None:
70d5c17b 3201 playlist_id = item_id
3202 if title is None:
39ed931e 3203 title = (
3204 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3205 or playlist_id)
b60419c5 3206 title += format_field(selected_tab, 'title', ' - %s')
3207
3208 metadata = {
3209 'playlist_id': playlist_id,
3210 'playlist_title': title,
3211 'playlist_description': description,
3212 'uploader': channel_name,
3213 'uploader_id': channel_id,
3214 'uploader_url': channel_url,
3215 'thumbnails': thumbnails,
3216 'tags': tags,
3217 }
3218 if not channel_id:
3219 metadata.update(self._extract_uploader(data))
3220 metadata.update({
3221 'channel': metadata['uploader'],
3222 'channel_id': metadata['uploader_id'],
3223 'channel_url': metadata['uploader_url']})
3224 return self.playlist_result(
d069eca7
M
3225 self._entries(
3226 selected_tab, playlist_id,
3227 self._extract_identity_token(webpage, item_id),
f4f751af 3228 self._extract_account_syncid(data),
3229 self._extract_ytcfg(item_id, webpage)),
b60419c5 3230 **metadata)
73c4ac2c 3231
cd7c66cf 3232 def _extract_mix_playlist(self, playlist, playlist_id):
2be71994 3233 first_id = last_id = None
3234 for page_num in itertools.count(1):
cd7c66cf 3235 videos = list(self._playlist_entries(playlist))
3236 if not videos:
3237 return
2be71994 3238 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3239 if start >= len(videos):
3240 return
3241 for video in videos[start:]:
3242 if video['id'] == first_id:
3243 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3244 return
3245 yield video
3246 first_id = first_id or videos[0]['id']
3247 last_id = videos[-1]['id']
cd7c66cf 3248
cd7c66cf 3249 _, data = self._extract_webpage(
2be71994 3250 'https://www.youtube.com/watch?list=%s&v=%s' % (playlist_id, last_id),
cd7c66cf 3251 '%s page %d' % (playlist_id, page_num))
3252 playlist = try_get(
3253 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3254
29f7c58a 3255 def _extract_from_playlist(self, item_id, url, data, playlist):
8bdd16b4 3256 title = playlist.get('title') or try_get(
3257 data, lambda x: x['titleText']['simpleText'], compat_str)
3258 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3259
3260 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3261 playlist_url = urljoin(url, try_get(
3262 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3263 compat_str))
3264 if playlist_url and playlist_url != url:
3265 return self.url_result(
3266 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3267 video_title=title)
cd7c66cf 3268
8bdd16b4 3269 return self.playlist_result(
cd7c66cf 3270 self._extract_mix_playlist(playlist, playlist_id),
3271 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3272
f3eaa8dd
M
3273 def _extract_alerts(self, data, expected=False):
3274
3275 def _real_extract_alerts():
3276 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3277 if not isinstance(alert_dict, dict):
02ced43c 3278 continue
f3eaa8dd
M
3279 for alert in alert_dict.values():
3280 alert_type = alert.get('type')
3281 if not alert_type:
3282 continue
3ffc7c89 3283 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
02ced43c 3284 if message:
3285 yield alert_type, message
f3eaa8dd 3286 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3ffc7c89 3287 message += try_get(run, lambda x: x['text'], compat_str)
3288 if message:
3289 yield alert_type, message
f3eaa8dd 3290
3ffc7c89 3291 errors = []
3292 warnings = []
f3eaa8dd
M
3293 for alert_type, alert_message in _real_extract_alerts():
3294 if alert_type.lower() == 'error':
3ffc7c89 3295 errors.append([alert_type, alert_message])
f3eaa8dd 3296 else:
3ffc7c89 3297 warnings.append([alert_type, alert_message])
f3eaa8dd 3298
3ffc7c89 3299 for alert_type, alert_message in (warnings + errors[:-1]):
3300 self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3301 if errors:
3302 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3303
cd7c66cf 3304 def _extract_webpage(self, url, item_id):
62bff2c1 3305 retries = self._downloader.params.get('extractor_retries', 3)
3306 count = -1
c705177d 3307 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3308 while count < retries:
62bff2c1 3309 count += 1
14fdfea9 3310 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3311 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3312 if count:
c705177d 3313 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3314 webpage = self._download_webpage(
3315 url, item_id,
cd7c66cf 3316 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3317 data = self._extract_yt_initial_data(item_id, webpage)
f3eaa8dd 3318 self._extract_alerts(data, expected=True)
14fdfea9 3319 if data.get('contents') or data.get('currentVideoEndpoint'):
3320 break
c705177d 3321 if count >= retries:
3322 self._downloader.report_error(last_error)
cd7c66cf 3323 return webpage, data
3324
3325 def _real_extract(self, url):
3326 item_id = self._match_id(url)
3327 url = compat_urlparse.urlunparse(
3328 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3329
3330 # This is not matched in a channel page with a tab selected
3331 mobj = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
3332 mobj = mobj.groupdict() if mobj else {}
3333 if mobj and not mobj.get('not_channel'):
3334 self._downloader.report_warning(
3335 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3336 'To download only the videos in the home page, add a "/featured" to the URL')
3337 url = '%s/videos%s' % (mobj.get('pre'), mobj.get('post') or '')
3338
3339 # Handle both video/playlist URLs
3340 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3341 video_id = qs.get('v', [None])[0]
3342 playlist_id = qs.get('list', [None])[0]
3343
3344 if not video_id and (mobj.get('not_channel') or '').startswith('watch'):
3345 if not playlist_id:
3346 # If there is neither video or playlist ids,
3347 # youtube redirects to home page, which is undesirable
3348 raise ExtractorError('Unable to recognize tab page')
3349 self._downloader.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
3350 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
3351
3352 if video_id and playlist_id:
3353 if self._downloader.params.get('noplaylist'):
3354 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3355 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3356 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3357
3358 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3359
8bdd16b4 3360 tabs = try_get(
3361 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3362 if tabs:
d069eca7 3363 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3364
8bdd16b4 3365 playlist = try_get(
3366 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3367 if playlist:
29f7c58a 3368 return self._extract_from_playlist(item_id, url, data, playlist)
cd7c66cf 3369
a0566bbf 3370 video_id = try_get(
3371 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3372 compat_str) or video_id
8bdd16b4 3373 if video_id:
cd7c66cf 3374 self._downloader.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3375 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3376
8bdd16b4 3377 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3378
c5e8d7af 3379
8bdd16b4 3380class YoutubePlaylistIE(InfoExtractor):
3381 IE_DESC = 'YouTube.com playlists'
3382 _VALID_URL = r'''(?x)(?:
3383 (?:https?://)?
3384 (?:\w+\.)?
3385 (?:
3386 (?:
3387 youtube(?:kids)?\.com|
29f7c58a 3388 invidio\.us
8bdd16b4 3389 )
3390 /.*?\?.*?\blist=
3391 )?
3392 (?P<id>%(playlist_id)s)
3393 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3394 IE_NAME = 'youtube:playlist'
cdc628a4 3395 _TESTS = [{
8bdd16b4 3396 'note': 'issue #673',
3397 'url': 'PLBB231211A4F62143',
cdc628a4 3398 'info_dict': {
8bdd16b4 3399 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3400 'id': 'PLBB231211A4F62143',
3401 'uploader': 'Wickydoo',
3402 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3403 },
3404 'playlist_mincount': 29,
3405 }, {
3406 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3407 'info_dict': {
3408 'title': 'YDL_safe_search',
3409 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3410 },
3411 'playlist_count': 2,
3412 'skip': 'This playlist is private',
9558dcec 3413 }, {
8bdd16b4 3414 'note': 'embedded',
3415 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3416 'playlist_count': 4,
9558dcec 3417 'info_dict': {
8bdd16b4 3418 'title': 'JODA15',
3419 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3420 'uploader': 'milan',
3421 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3422 }
cdc628a4 3423 }, {
8bdd16b4 3424 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3425 'playlist_mincount': 982,
3426 'info_dict': {
3427 'title': '2018 Chinese New Singles (11/6 updated)',
3428 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3429 'uploader': 'LBK',
3430 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3431 }
daa0df9e 3432 }, {
29f7c58a 3433 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3434 'only_matching': True,
3435 }, {
3436 # music album playlist
3437 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3438 'only_matching': True,
3439 }]
3440
3441 @classmethod
3442 def suitable(cls, url):
3443 return False if YoutubeTabIE.suitable(url) else super(
3444 YoutubePlaylistIE, cls).suitable(url)
3445
3446 def _real_extract(self, url):
3447 playlist_id = self._match_id(url)
3448 qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
3449 if not qs:
3450 qs = {'list': playlist_id}
3451 return self.url_result(
3452 update_url_query('https://www.youtube.com/playlist', qs),
3453 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3454
3455
3456class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3457 IE_DESC = 'youtu.be'
29f7c58a 3458 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3459 _TESTS = [{
8bdd16b4 3460 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3461 'info_dict': {
3462 'id': 'yeWKywCrFtk',
3463 'ext': 'mp4',
3464 'title': 'Small Scale Baler and Braiding Rugs',
3465 'uploader': 'Backus-Page House Museum',
3466 'uploader_id': 'backuspagemuseum',
3467 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3468 'upload_date': '20161008',
3469 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3470 'categories': ['Nonprofits & Activism'],
3471 'tags': list,
3472 'like_count': int,
3473 'dislike_count': int,
3474 },
3475 'params': {
3476 'noplaylist': True,
3477 'skip_download': True,
3478 },
39e7107d 3479 }, {
8bdd16b4 3480 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3481 'only_matching': True,
cdc628a4
PH
3482 }]
3483
8bdd16b4 3484 def _real_extract(self, url):
29f7c58a 3485 mobj = re.match(self._VALID_URL, url)
3486 video_id = mobj.group('id')
3487 playlist_id = mobj.group('playlist_id')
8bdd16b4 3488 return self.url_result(
29f7c58a 3489 update_url_query('https://www.youtube.com/watch', {
3490 'v': video_id,
3491 'list': playlist_id,
3492 'feature': 'youtu.be',
3493 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3494
3495
3496class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3497 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3498 _VALID_URL = r'ytuser:(?P<id>.+)'
3499 _TESTS = [{
3500 'url': 'ytuser:phihag',
3501 'only_matching': True,
3502 }]
3503
3504 def _real_extract(self, url):
3505 user_id = self._match_id(url)
3506 return self.url_result(
3507 'https://www.youtube.com/user/%s' % user_id,
3508 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3509
b05654f0 3510
3d3dddc9 3511class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3512 IE_NAME = 'youtube:favorites'
3513 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3514 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3515 _LOGIN_REQUIRED = True
3516 _TESTS = [{
3517 'url': ':ytfav',
3518 'only_matching': True,
3519 }, {
3520 'url': ':ytfavorites',
3521 'only_matching': True,
3522 }]
3523
3524 def _real_extract(self, url):
3525 return self.url_result(
3526 'https://www.youtube.com/playlist?list=LL',
3527 ie=YoutubeTabIE.ie_key())
3528
3529
8bdd16b4 3530class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
69184e41 3531 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3532 # there doesn't appear to be a real limit, for example if you search for
3533 # 'python' you get more than 8.000.000 results
3534 _MAX_RESULTS = float('inf')
78caa52a 3535 IE_NAME = 'youtube:search'
b05654f0 3536 _SEARCH_KEY = 'ytsearch'
6c894ea1 3537 _SEARCH_PARAMS = None
9dd8e46a 3538 _TESTS = []
b05654f0 3539
6c894ea1 3540 def _entries(self, query, n):
a5c56234 3541 data = {'query': query}
6c894ea1
U
3542 if self._SEARCH_PARAMS:
3543 data['params'] = self._SEARCH_PARAMS
3544 total = 0
3545 for page_num in itertools.count(1):
a5c56234
M
3546 search = self._call_api(
3547 ep='search', video_id='query "%s"' % query, fatal=False,
3548 note='Downloading page %s' % page_num, query=data)
6c894ea1 3549 if not search:
b4c08069 3550 break
6c894ea1
U
3551 slr_contents = try_get(
3552 search,
3553 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3554 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3555 list)
3556 if not slr_contents:
a22b2fd1 3557 break
0366ae87 3558
0366ae87
M
3559 # Youtube sometimes adds promoted content to searches,
3560 # changing the index location of videos and token.
3561 # So we search through all entries till we find them.
30a074c2 3562 continuation_token = None
3563 for slr_content in slr_contents:
a96c6d15 3564 if continuation_token is None:
3565 continuation_token = try_get(
3566 slr_content,
3567 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3568 compat_str)
3569
30a074c2 3570 isr_contents = try_get(
3571 slr_content,
3572 lambda x: x['itemSectionRenderer']['contents'],
3573 list)
9da76d30 3574 if not isr_contents:
30a074c2 3575 continue
3576 for content in isr_contents:
3577 if not isinstance(content, dict):
3578 continue
3579 video = content.get('videoRenderer')
3580 if not isinstance(video, dict):
3581 continue
3582 video_id = video.get('videoId')
3583 if not video_id:
3584 continue
3585
3586 yield self._extract_video(video)
3587 total += 1
3588 if total == n:
3589 return
0366ae87 3590
0366ae87 3591 if not continuation_token:
6c894ea1 3592 break
0366ae87 3593 data['continuation'] = continuation_token
b05654f0 3594
6c894ea1
U
3595 def _get_n_results(self, query, n):
3596 """Get a specified number of results for a query"""
3597 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3598
c9ae7b95 3599
a3dd9248 3600class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3601 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3602 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3603 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3604 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 3605
c9ae7b95 3606
386e1dd9 3607class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 3608 IE_DESC = 'YouTube.com search URLs'
386e1dd9 3609 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
3610 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 3611 # _MAX_RESULTS = 100
3462ffa8 3612 _TESTS = [{
3613 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
3614 'playlist_mincount': 5,
3615 'info_dict': {
3616 'title': 'youtube-dl test video',
3617 }
3618 }, {
3619 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
3620 'only_matching': True,
3621 }]
3622
386e1dd9 3623 @classmethod
3624 def _make_valid_url(cls):
3625 return cls._VALID_URL
3626
3462ffa8 3627 def _real_extract(self, url):
386e1dd9 3628 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
3629 query = (qs.get('search_query') or qs.get('q'))[0]
3630 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
3631 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 3632
3633
3634class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 3635 """
25f14e9f 3636 Base class for feed extractors
3d3dddc9 3637 Subclasses must define the _FEED_NAME property.
d7ae0639 3638 """
b2e8bc1b 3639 _LOGIN_REQUIRED = True
ef2f3c7f 3640 _TESTS = []
d7ae0639
JMF
3641
3642 @property
3643 def IE_NAME(self):
78caa52a 3644 return 'youtube:%s' % self._FEED_NAME
04cc9617 3645
81f0259b 3646 def _real_initialize(self):
b2e8bc1b 3647 self._login()
81f0259b 3648
3853309f 3649 def _real_extract(self, url):
3d3dddc9 3650 return self.url_result(
3651 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
3652 ie=YoutubeTabIE.ie_key())
25f14e9f
S
3653
3654
ef2f3c7f 3655class YoutubeWatchLaterIE(InfoExtractor):
3656 IE_NAME = 'youtube:watchlater'
70d5c17b 3657 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 3658 _VALID_URL = r':ytwatchlater'
bc7a9cd8 3659 _TESTS = [{
8bdd16b4 3660 'url': ':ytwatchlater',
bc7a9cd8
S
3661 'only_matching': True,
3662 }]
25f14e9f
S
3663
3664 def _real_extract(self, url):
ef2f3c7f 3665 return self.url_result(
3666 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 3667
3668
25f14e9f
S
3669class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
3670 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 3671 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 3672 _FEED_NAME = 'recommended'
3d3dddc9 3673 _TESTS = [{
3674 'url': ':ytrec',
3675 'only_matching': True,
3676 }, {
3677 'url': ':ytrecommended',
3678 'only_matching': True,
3679 }, {
3680 'url': 'https://youtube.com',
3681 'only_matching': True,
3682 }]
1ed5b5c9 3683
1ed5b5c9 3684
25f14e9f 3685class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 3686 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 3687 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 3688 _FEED_NAME = 'subscriptions'
3d3dddc9 3689 _TESTS = [{
3690 'url': ':ytsubs',
3691 'only_matching': True,
3692 }, {
3693 'url': ':ytsubscriptions',
3694 'only_matching': True,
3695 }]
1ed5b5c9 3696
1ed5b5c9 3697
25f14e9f 3698class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
3699 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
3700 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 3701 _FEED_NAME = 'history'
3d3dddc9 3702 _TESTS = [{
3703 'url': ':ythistory',
3704 'only_matching': True,
3705 }]
1ed5b5c9
JMF
3706
3707
15870e90
PH
3708class YoutubeTruncatedURLIE(InfoExtractor):
3709 IE_NAME = 'youtube:truncated_url'
3710 IE_DESC = False # Do not list
975d35db 3711 _VALID_URL = r'''(?x)
b95aab84
PH
3712 (?:https?://)?
3713 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
3714 (?:watch\?(?:
c4808c60 3715 feature=[a-z_]+|
b95aab84
PH
3716 annotation_id=annotation_[^&]+|
3717 x-yt-cl=[0-9]+|
c1708b89 3718 hl=[^&]*|
287be8c6 3719 t=[0-9]+
b95aab84
PH
3720 )?
3721 |
3722 attribution_link\?a=[^&]+
3723 )
3724 $
975d35db 3725 '''
15870e90 3726
c4808c60 3727 _TESTS = [{
2d3d2997 3728 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 3729 'only_matching': True,
dc2fc736 3730 }, {
2d3d2997 3731 'url': 'https://www.youtube.com/watch?',
dc2fc736 3732 'only_matching': True,
b95aab84
PH
3733 }, {
3734 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
3735 'only_matching': True,
3736 }, {
3737 'url': 'https://www.youtube.com/watch?feature=foo',
3738 'only_matching': True,
c1708b89
PH
3739 }, {
3740 'url': 'https://www.youtube.com/watch?hl=en-GB',
3741 'only_matching': True,
287be8c6
PH
3742 }, {
3743 'url': 'https://www.youtube.com/watch?t=2372',
3744 'only_matching': True,
c4808c60
PH
3745 }]
3746
15870e90
PH
3747 def _real_extract(self, url):
3748 raise ExtractorError(
78caa52a
PH
3749 'Did you forget to quote the URL? Remember that & is a meta '
3750 'character in most shells, so you want to put the URL in quotes, '
3867038a 3751 'like youtube-dl '
2d3d2997 3752 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 3753 ' or simply youtube-dl BaW_jenozKc .',
15870e90 3754 expected=True)
772fd5cc
PH
3755
3756
3757class YoutubeTruncatedIDIE(InfoExtractor):
3758 IE_NAME = 'youtube:truncated_id'
3759 IE_DESC = False # Do not list
b95aab84 3760 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
3761
3762 _TESTS = [{
3763 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
3764 'only_matching': True,
3765 }]
3766
3767 def _real_extract(self, url):
3768 video_id = self._match_id(url)
3769 raise ExtractorError(
3770 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
3771 expected=True)