]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[crunchyroll:playlist] Force http
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 import base64
6 import calendar
7 import copy
8 import datetime
9 import hashlib
10 import itertools
11 import json
12 import os.path
13 import random
14 import re
15 import time
16 import traceback
17
18 from .common import InfoExtractor, SearchInfoExtractor
19 from ..compat import (
20 compat_chr,
21 compat_HTTPError,
22 compat_parse_qs,
23 compat_str,
24 compat_urllib_parse_unquote_plus,
25 compat_urllib_parse_urlencode,
26 compat_urllib_parse_urlparse,
27 compat_urlparse,
28 )
29 from ..jsinterp import JSInterpreter
30 from ..utils import (
31 bool_or_none,
32 bytes_to_intlist,
33 clean_html,
34 dict_get,
35 datetime_from_str,
36 error_to_compat_str,
37 ExtractorError,
38 format_field,
39 float_or_none,
40 int_or_none,
41 intlist_to_bytes,
42 mimetype2ext,
43 parse_codecs,
44 parse_count,
45 parse_duration,
46 qualities,
47 remove_start,
48 smuggle_url,
49 str_or_none,
50 str_to_int,
51 try_get,
52 unescapeHTML,
53 unified_strdate,
54 unsmuggle_url,
55 update_url_query,
56 url_or_none,
57 urlencode_postdata,
58 urljoin,
59 variadic
60 )
61
62
63 def parse_qs(url):
64 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
65
66
67 class YoutubeBaseInfoExtractor(InfoExtractor):
68 """Provide base functions for Youtube extractors"""
69 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
70 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
71
72 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
73 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
74 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
75
76 _RESERVED_NAMES = (
77 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
78 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
79 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
80
81 _NETRC_MACHINE = 'youtube'
82 # If True it will raise an error if no login info is provided
83 _LOGIN_REQUIRED = False
84
85 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
86
87 def _login(self):
88 """
89 Attempt to log in to YouTube.
90 True is returned if successful or skipped.
91 False is returned if login failed.
92
93 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
94 """
95
96 def warn(message):
97 self.report_warning(message)
98
99 # username+password login is broken
100 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
101 self.raise_login_required(
102 'Login details are needed to download this content', method='cookies')
103 username, password = self._get_login_info()
104 if username:
105 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
106 return
107
108 # Everything below this is broken!
109 r'''
110 # No authentication to be performed
111 if username is None:
112 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
113 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
114 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
115 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
116 return True
117
118 login_page = self._download_webpage(
119 self._LOGIN_URL, None,
120 note='Downloading login page',
121 errnote='unable to fetch login page', fatal=False)
122 if login_page is False:
123 return
124
125 login_form = self._hidden_inputs(login_page)
126
127 def req(url, f_req, note, errnote):
128 data = login_form.copy()
129 data.update({
130 'pstMsg': 1,
131 'checkConnection': 'youtube',
132 'checkedDomains': 'youtube',
133 'hl': 'en',
134 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
135 'f.req': json.dumps(f_req),
136 'flowName': 'GlifWebSignIn',
137 'flowEntry': 'ServiceLogin',
138 # TODO: reverse actual botguard identifier generation algo
139 'bgRequest': '["identifier",""]',
140 })
141 return self._download_json(
142 url, None, note=note, errnote=errnote,
143 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
144 fatal=False,
145 data=urlencode_postdata(data), headers={
146 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
147 'Google-Accounts-XSRF': 1,
148 })
149
150 lookup_req = [
151 username,
152 None, [], None, 'US', None, None, 2, False, True,
153 [
154 None, None,
155 [2, 1, None, 1,
156 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
157 None, [], 4],
158 1, [None, None, []], None, None, None, True
159 ],
160 username,
161 ]
162
163 lookup_results = req(
164 self._LOOKUP_URL, lookup_req,
165 'Looking up account info', 'Unable to look up account info')
166
167 if lookup_results is False:
168 return False
169
170 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
171 if not user_hash:
172 warn('Unable to extract user hash')
173 return False
174
175 challenge_req = [
176 user_hash,
177 None, 1, None, [1, None, None, None, [password, None, True]],
178 [
179 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
180 1, [None, None, []], None, None, None, True
181 ]]
182
183 challenge_results = req(
184 self._CHALLENGE_URL, challenge_req,
185 'Logging in', 'Unable to log in')
186
187 if challenge_results is False:
188 return
189
190 login_res = try_get(challenge_results, lambda x: x[0][5], list)
191 if login_res:
192 login_msg = try_get(login_res, lambda x: x[5], compat_str)
193 warn(
194 'Unable to login: %s' % 'Invalid password'
195 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
196 return False
197
198 res = try_get(challenge_results, lambda x: x[0][-1], list)
199 if not res:
200 warn('Unable to extract result entry')
201 return False
202
203 login_challenge = try_get(res, lambda x: x[0][0], list)
204 if login_challenge:
205 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
206 if challenge_str == 'TWO_STEP_VERIFICATION':
207 # SEND_SUCCESS - TFA code has been successfully sent to phone
208 # QUOTA_EXCEEDED - reached the limit of TFA codes
209 status = try_get(login_challenge, lambda x: x[5], compat_str)
210 if status == 'QUOTA_EXCEEDED':
211 warn('Exceeded the limit of TFA codes, try later')
212 return False
213
214 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
215 if not tl:
216 warn('Unable to extract TL')
217 return False
218
219 tfa_code = self._get_tfa_info('2-step verification code')
220
221 if not tfa_code:
222 warn(
223 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
224 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
225 return False
226
227 tfa_code = remove_start(tfa_code, 'G-')
228
229 tfa_req = [
230 user_hash, None, 2, None,
231 [
232 9, None, None, None, None, None, None, None,
233 [None, tfa_code, True, 2]
234 ]]
235
236 tfa_results = req(
237 self._TFA_URL.format(tl), tfa_req,
238 'Submitting TFA code', 'Unable to submit TFA code')
239
240 if tfa_results is False:
241 return False
242
243 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
244 if tfa_res:
245 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
246 warn(
247 'Unable to finish TFA: %s' % 'Invalid TFA code'
248 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
249 return False
250
251 check_cookie_url = try_get(
252 tfa_results, lambda x: x[0][-1][2], compat_str)
253 else:
254 CHALLENGES = {
255 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
256 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
257 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
258 }
259 challenge = CHALLENGES.get(
260 challenge_str,
261 '%s returned error %s.' % (self.IE_NAME, challenge_str))
262 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
263 return False
264 else:
265 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
266
267 if not check_cookie_url:
268 warn('Unable to extract CheckCookie URL')
269 return False
270
271 check_cookie_results = self._download_webpage(
272 check_cookie_url, None, 'Checking cookie', fatal=False)
273
274 if check_cookie_results is False:
275 return False
276
277 if 'https://myaccount.google.com/' not in check_cookie_results:
278 warn('Unable to log in')
279 return False
280
281 return True
282 '''
283
284 def _initialize_consent(self):
285 cookies = self._get_cookies('https://www.youtube.com/')
286 if cookies.get('__Secure-3PSID'):
287 return
288 consent_id = None
289 consent = cookies.get('CONSENT')
290 if consent:
291 if 'YES' in consent.value:
292 return
293 consent_id = self._search_regex(
294 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
295 if not consent_id:
296 consent_id = random.randint(100, 999)
297 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
298
299 def _real_initialize(self):
300 self._initialize_consent()
301 if self._downloader is None:
302 return
303 if not self._login():
304 return
305
306 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
307 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
308 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
309
310 _YT_DEFAULT_YTCFGS = {
311 'WEB': {
312 'INNERTUBE_API_VERSION': 'v1',
313 'INNERTUBE_CLIENT_NAME': 'WEB',
314 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
315 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
316 'INNERTUBE_CONTEXT': {
317 'client': {
318 'clientName': 'WEB',
319 'clientVersion': '2.20210622.10.00',
320 'hl': 'en',
321 }
322 },
323 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
324 },
325 'WEB_REMIX': {
326 'INNERTUBE_API_VERSION': 'v1',
327 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
328 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
329 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
330 'INNERTUBE_CONTEXT': {
331 'client': {
332 'clientName': 'WEB_REMIX',
333 'clientVersion': '1.20210621.00.00',
334 'hl': 'en',
335 }
336 },
337 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
338 },
339 'WEB_EMBEDDED_PLAYER': {
340 'INNERTUBE_API_VERSION': 'v1',
341 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
342 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
343 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
344 'INNERTUBE_CONTEXT': {
345 'client': {
346 'clientName': 'WEB_EMBEDDED_PLAYER',
347 'clientVersion': '1.20210620.0.1',
348 'hl': 'en',
349 }
350 },
351 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
352 },
353 'ANDROID': {
354 'INNERTUBE_API_VERSION': 'v1',
355 'INNERTUBE_CLIENT_NAME': 'ANDROID',
356 'INNERTUBE_CLIENT_VERSION': '16.20',
357 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
358 'INNERTUBE_CONTEXT': {
359 'client': {
360 'clientName': 'ANDROID',
361 'clientVersion': '16.20',
362 'hl': 'en',
363 }
364 },
365 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
366 },
367 'ANDROID_EMBEDDED_PLAYER': {
368 'INNERTUBE_API_VERSION': 'v1',
369 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
370 'INNERTUBE_CLIENT_VERSION': '16.20',
371 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
372 'INNERTUBE_CONTEXT': {
373 'client': {
374 'clientName': 'ANDROID_EMBEDDED_PLAYER',
375 'clientVersion': '16.20',
376 'hl': 'en',
377 }
378 },
379 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
380 },
381 'ANDROID_MUSIC': {
382 'INNERTUBE_API_VERSION': 'v1',
383 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
384 'INNERTUBE_CLIENT_VERSION': '4.32',
385 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
386 'INNERTUBE_CONTEXT': {
387 'client': {
388 'clientName': 'ANDROID_MUSIC',
389 'clientVersion': '4.32',
390 'hl': 'en',
391 }
392 },
393 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
394 }
395 }
396
397 _YT_DEFAULT_INNERTUBE_HOSTS = {
398 'DIRECT': 'youtubei.googleapis.com',
399 'WEB': 'www.youtube.com',
400 'WEB_REMIX': 'music.youtube.com',
401 'ANDROID_MUSIC': 'music.youtube.com'
402 }
403
404 def _get_default_ytcfg(self, client='WEB'):
405 if client in self._YT_DEFAULT_YTCFGS:
406 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
407 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
408 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
409
410 def _get_innertube_host(self, client='WEB'):
411 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
412
413 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
414 # try_get but with fallback to default ytcfg client values when present
415 _func = lambda y: try_get(y, getter, expected_type)
416 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
417
418 def _extract_client_name(self, ytcfg, default_client='WEB'):
419 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
420
421 @staticmethod
422 def _extract_session_index(ytcfg):
423 return int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
424
425 def _extract_client_version(self, ytcfg, default_client='WEB'):
426 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
427
428 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
429 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
430
431 def _extract_context(self, ytcfg=None, default_client='WEB'):
432 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
433 context = _get_context(ytcfg)
434 if context:
435 return context
436
437 context = _get_context(self._get_default_ytcfg(default_client))
438 if not ytcfg:
439 return context
440
441 # Recreate the client context (required)
442 context['client'].update({
443 'clientVersion': self._extract_client_version(ytcfg, default_client),
444 'clientName': self._extract_client_name(ytcfg, default_client),
445 })
446 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
447 if visitor_data:
448 context['client']['visitorData'] = visitor_data
449 return context
450
451 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
452 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
453 # See: https://github.com/yt-dlp/yt-dlp/issues/393
454 yt_cookies = self._get_cookies('https://www.youtube.com')
455 sapisid_cookie = dict_get(
456 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
457 if sapisid_cookie is None:
458 return
459 time_now = round(time.time())
460 # SAPISID cookie is required if not already present
461 if not yt_cookies.get('SAPISID'):
462 self._set_cookie(
463 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
464 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
465 sapisidhash = hashlib.sha1(
466 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
467 return f'SAPISIDHASH {time_now}_{sapisidhash}'
468
469 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
470 note='Downloading API JSON', errnote='Unable to download API page',
471 context=None, api_key=None, api_hostname=None, default_client='WEB'):
472
473 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
474 data.update(query)
475 real_headers = self._generate_api_headers(client=default_client)
476 real_headers.update({'content-type': 'application/json'})
477 if headers:
478 real_headers.update(headers)
479 return self._download_json(
480 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
481 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
482 data=json.dumps(data).encode('utf8'), headers=real_headers,
483 query={'key': api_key or self._extract_api_key()})
484
485 def _extract_yt_initial_data(self, video_id, webpage):
486 return self._parse_json(
487 self._search_regex(
488 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
489 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
490 video_id)
491
492 def _extract_identity_token(self, webpage, item_id):
493 ytcfg = self._extract_ytcfg(item_id, webpage)
494 if ytcfg:
495 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
496 if token:
497 return token
498 return self._search_regex(
499 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
500 'identity token', default=None)
501
502 @staticmethod
503 def _extract_account_syncid(*args):
504 """
505 Extract syncId required to download private playlists of secondary channels
506 @params response and/or ytcfg
507 """
508 for data in args:
509 # ytcfg includes channel_syncid if on secondary channel
510 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
511 if delegated_sid:
512 return delegated_sid
513 sync_ids = (try_get(
514 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
515 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
516 if len(sync_ids) >= 2 and sync_ids[1]:
517 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
518 # and just "user_syncid||" for primary channel. We only want the channel_syncid
519 return sync_ids[0]
520
521 def _extract_ytcfg(self, video_id, webpage):
522 if not webpage:
523 return {}
524 return self._parse_json(
525 self._search_regex(
526 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
527 default='{}'), video_id, fatal=False) or {}
528
529 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
530 visitor_data=None, api_hostname=None, client='WEB', session_index=None):
531 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
532 headers = {
533 'X-YouTube-Client-Name': compat_str(
534 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
535 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
536 'Origin': origin
537 }
538 if not visitor_data and ytcfg:
539 visitor_data = try_get(
540 self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)
541 if identity_token:
542 headers['X-Youtube-Identity-Token'] = identity_token
543 if account_syncid:
544 headers['X-Goog-PageId'] = account_syncid
545 if session_index is None and ytcfg:
546 session_index = self._extract_session_index(ytcfg)
547 if account_syncid or session_index is not None:
548 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
549 if visitor_data:
550 headers['X-Goog-Visitor-Id'] = visitor_data
551 auth = self._generate_sapisidhash_header(origin)
552 if auth is not None:
553 headers['Authorization'] = auth
554 headers['X-Origin'] = origin
555 return headers
556
557 @staticmethod
558 def _build_api_continuation_query(continuation, ctp=None):
559 query = {
560 'continuation': continuation
561 }
562 # TODO: Inconsistency with clickTrackingParams.
563 # Currently we have a fixed ctp contained within context (from ytcfg)
564 # and a ctp in root query for continuation.
565 if ctp:
566 query['clickTracking'] = {'clickTrackingParams': ctp}
567 return query
568
569 @classmethod
570 def _extract_next_continuation_data(cls, renderer):
571 next_continuation = try_get(
572 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
573 lambda x: x['continuation']['reloadContinuationData']), dict)
574 if not next_continuation:
575 return
576 continuation = next_continuation.get('continuation')
577 if not continuation:
578 return
579 ctp = next_continuation.get('clickTrackingParams')
580 return cls._build_api_continuation_query(continuation, ctp)
581
582 @classmethod
583 def _extract_continuation_ep_data(cls, continuation_ep: dict):
584 if isinstance(continuation_ep, dict):
585 continuation = try_get(
586 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
587 if not continuation:
588 return
589 ctp = continuation_ep.get('clickTrackingParams')
590 return cls._build_api_continuation_query(continuation, ctp)
591
592 @classmethod
593 def _extract_continuation(cls, renderer):
594 next_continuation = cls._extract_next_continuation_data(renderer)
595 if next_continuation:
596 return next_continuation
597
598 contents = []
599 for key in ('contents', 'items'):
600 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
601
602 for content in contents:
603 if not isinstance(content, dict):
604 continue
605 continuation_ep = try_get(
606 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
607 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
608 dict)
609 continuation = cls._extract_continuation_ep_data(continuation_ep)
610 if continuation:
611 return continuation
612
613 @classmethod
614 def _extract_alerts(cls, data):
615 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
616 if not isinstance(alert_dict, dict):
617 continue
618 for alert in alert_dict.values():
619 alert_type = alert.get('type')
620 if not alert_type:
621 continue
622 message = cls._get_text(alert.get('text'))
623 if message:
624 yield alert_type, message
625
626 def _report_alerts(self, alerts, expected=True):
627 errors = []
628 warnings = []
629 for alert_type, alert_message in alerts:
630 if alert_type.lower() == 'error':
631 errors.append([alert_type, alert_message])
632 else:
633 warnings.append([alert_type, alert_message])
634
635 for alert_type, alert_message in (warnings + errors[:-1]):
636 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
637 if errors:
638 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
639
640 def _extract_and_report_alerts(self, data, *args, **kwargs):
641 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
642
643 def _extract_badges(self, renderer: dict):
644 badges = set()
645 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
646 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
647 if label:
648 badges.add(label.lower())
649 return badges
650
651 @staticmethod
652 def _get_text(data, getter=None, max_runs=None):
653 for get in variadic(getter):
654 d = try_get(data, get) if get is not None else data
655 text = try_get(d, lambda x: x['simpleText'], compat_str)
656 if text:
657 return text
658 runs = try_get(d, lambda x: x['runs'], list) or []
659 if not runs and isinstance(d, list):
660 runs = d
661
662 def get_runs(runs):
663 for run in runs[:min(len(runs), max_runs or len(runs))]:
664 yield try_get(run, lambda x: x['text'], compat_str) or ''
665
666 text = ''.join(get_runs(runs))
667 if text:
668 return text
669
670 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
671 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
672 default_client='WEB'):
673 response = None
674 last_error = None
675 count = -1
676 retries = self.get_param('extractor_retries', 3)
677 if check_get_keys is None:
678 check_get_keys = []
679 while count < retries:
680 count += 1
681 if last_error:
682 self.report_warning('%s. Retrying ...' % last_error)
683 try:
684 response = self._call_api(
685 ep=ep, fatal=True, headers=headers,
686 video_id=item_id, query=query,
687 context=self._extract_context(ytcfg, default_client),
688 api_key=self._extract_api_key(ytcfg, default_client),
689 api_hostname=api_hostname, default_client=default_client,
690 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
691 except ExtractorError as e:
692 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
693 # Downloading page may result in intermittent 5xx HTTP error
694 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
695 last_error = 'HTTP Error %s' % e.cause.code
696 if count < retries:
697 continue
698 if fatal:
699 raise
700 else:
701 self.report_warning(error_to_compat_str(e))
702 return
703
704 else:
705 # Youtube may send alerts if there was an issue with the continuation page
706 try:
707 self._extract_and_report_alerts(response, expected=False)
708 except ExtractorError as e:
709 if fatal:
710 raise
711 self.report_warning(error_to_compat_str(e))
712 return
713 if not check_get_keys or dict_get(response, check_get_keys):
714 break
715 # Youtube sometimes sends incomplete data
716 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
717 last_error = 'Incomplete data received'
718 if count >= retries:
719 if fatal:
720 raise ExtractorError(last_error)
721 else:
722 self.report_warning(last_error)
723 return
724 return response
725
726 @staticmethod
727 def is_music_url(url):
728 return re.match(r'https?://music\.youtube\.com/', url) is not None
729
730 def _extract_video(self, renderer):
731 video_id = renderer.get('videoId')
732 title = self._get_text(renderer.get('title'))
733 description = self._get_text(renderer.get('descriptionSnippet'))
734 duration = parse_duration(self._get_text(renderer.get('lengthText')))
735 view_count_text = self._get_text(renderer.get('viewCountText')) or ''
736 view_count = str_to_int(self._search_regex(
737 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
738 'view count', default=None))
739
740 uploader = self._get_text(renderer, (lambda x: x['ownerText'], lambda x: x['shortBylineText']))
741
742 return {
743 '_type': 'url',
744 'ie_key': YoutubeIE.ie_key(),
745 'id': video_id,
746 'url': video_id,
747 'title': title,
748 'description': description,
749 'duration': duration,
750 'view_count': view_count,
751 'uploader': uploader,
752 }
753
754
755 class YoutubeIE(YoutubeBaseInfoExtractor):
756 IE_DESC = 'YouTube.com'
757 _INVIDIOUS_SITES = (
758 # invidious-redirect websites
759 r'(?:www\.)?redirect\.invidious\.io',
760 r'(?:(?:www|dev)\.)?invidio\.us',
761 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
762 r'(?:www\.)?invidious\.pussthecat\.org',
763 r'(?:www\.)?invidious\.zee\.li',
764 r'(?:www\.)?invidious\.ethibox\.fr',
765 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
766 # youtube-dl invidious instances list
767 r'(?:(?:www|no)\.)?invidiou\.sh',
768 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
769 r'(?:www\.)?invidious\.kabi\.tk',
770 r'(?:www\.)?invidious\.mastodon\.host',
771 r'(?:www\.)?invidious\.zapashcanon\.fr',
772 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
773 r'(?:www\.)?invidious\.tinfoil-hat\.net',
774 r'(?:www\.)?invidious\.himiko\.cloud',
775 r'(?:www\.)?invidious\.reallyancient\.tech',
776 r'(?:www\.)?invidious\.tube',
777 r'(?:www\.)?invidiou\.site',
778 r'(?:www\.)?invidious\.site',
779 r'(?:www\.)?invidious\.xyz',
780 r'(?:www\.)?invidious\.nixnet\.xyz',
781 r'(?:www\.)?invidious\.048596\.xyz',
782 r'(?:www\.)?invidious\.drycat\.fr',
783 r'(?:www\.)?inv\.skyn3t\.in',
784 r'(?:www\.)?tube\.poal\.co',
785 r'(?:www\.)?tube\.connect\.cafe',
786 r'(?:www\.)?vid\.wxzm\.sx',
787 r'(?:www\.)?vid\.mint\.lgbt',
788 r'(?:www\.)?vid\.puffyan\.us',
789 r'(?:www\.)?yewtu\.be',
790 r'(?:www\.)?yt\.elukerio\.org',
791 r'(?:www\.)?yt\.lelux\.fi',
792 r'(?:www\.)?invidious\.ggc-project\.de',
793 r'(?:www\.)?yt\.maisputain\.ovh',
794 r'(?:www\.)?ytprivate\.com',
795 r'(?:www\.)?invidious\.13ad\.de',
796 r'(?:www\.)?invidious\.toot\.koeln',
797 r'(?:www\.)?invidious\.fdn\.fr',
798 r'(?:www\.)?watch\.nettohikari\.com',
799 r'(?:www\.)?invidious\.namazso\.eu',
800 r'(?:www\.)?invidious\.silkky\.cloud',
801 r'(?:www\.)?invidious\.exonip\.de',
802 r'(?:www\.)?invidious\.riverside\.rocks',
803 r'(?:www\.)?invidious\.blamefran\.net',
804 r'(?:www\.)?invidious\.moomoo\.de',
805 r'(?:www\.)?ytb\.trom\.tf',
806 r'(?:www\.)?yt\.cyberhost\.uk',
807 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
808 r'(?:www\.)?qklhadlycap4cnod\.onion',
809 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
810 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
811 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
812 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
813 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
814 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
815 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
816 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
817 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
818 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
819 )
820 _VALID_URL = r"""(?x)^
821 (
822 (?:https?://|//) # http(s):// or protocol-independent URL
823 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
824 (?:www\.)?deturl\.com/www\.youtube\.com|
825 (?:www\.)?pwnyoutube\.com|
826 (?:www\.)?hooktube\.com|
827 (?:www\.)?yourepeat\.com|
828 tube\.majestyc\.net|
829 %(invidious)s|
830 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
831 (?:.*?\#/)? # handle anchor (#/) redirect urls
832 (?: # the various things that can precede the ID:
833 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
834 |(?: # or the v= param in all its forms
835 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
836 (?:\?|\#!?) # the params delimiter ? or # or #!
837 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
838 v=
839 )
840 ))
841 |(?:
842 youtu\.be| # just youtu.be/xxxx
843 vid\.plus| # or vid.plus/xxxx
844 zwearz\.com/watch| # or zwearz.com/watch/xxxx
845 %(invidious)s
846 )/
847 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
848 )
849 )? # all until now is optional -> you can pass the naked ID
850 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
851 (?(1).+)? # if we found the ID, everything can follow
852 (?:\#|$)""" % {
853 'invidious': '|'.join(_INVIDIOUS_SITES),
854 }
855 _PLAYER_INFO_RE = (
856 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
857 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
858 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
859 )
860 _formats = {
861 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
862 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
863 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
864 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
865 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
866 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
867 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
868 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
869 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
870 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
871 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
872 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
873 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
874 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
875 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
876 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
877 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
878 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
879
880
881 # 3D videos
882 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
883 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
884 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
885 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
886 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
887 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
888 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
889
890 # Apple HTTP Live Streaming
891 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
892 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
893 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
894 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
895 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
896 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
897 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
898 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
899
900 # DASH mp4 video
901 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
902 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
903 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
904 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
905 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
906 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
907 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
908 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
909 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
910 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
911 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
912 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
913
914 # Dash mp4 audio
915 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
916 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
917 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
918 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
919 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
920 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
921 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
922
923 # Dash webm
924 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
925 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
926 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
927 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
928 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
929 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
930 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
931 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
932 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
933 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
934 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
935 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
936 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
937 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
938 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
939 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
940 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
941 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
942 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
943 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
944 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
945 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
946
947 # Dash webm audio
948 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
949 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
950
951 # Dash webm audio with opus inside
952 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
953 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
954 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
955
956 # RTMP (unnamed)
957 '_rtmp': {'protocol': 'rtmp'},
958
959 # av01 video only formats sometimes served with "unknown" codecs
960 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
961 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
962 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
963 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
964 }
965 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
966
967 _AGE_GATE_REASONS = (
968 'Sign in to confirm your age',
969 'This video may be inappropriate for some users.',
970 'Sorry, this content is age-restricted.')
971
972 _GEO_BYPASS = False
973
974 IE_NAME = 'youtube'
975 _TESTS = [
976 {
977 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
978 'info_dict': {
979 'id': 'BaW_jenozKc',
980 'ext': 'mp4',
981 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
982 'uploader': 'Philipp Hagemeister',
983 'uploader_id': 'phihag',
984 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
985 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
986 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
987 'upload_date': '20121002',
988 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
989 'categories': ['Science & Technology'],
990 'tags': ['youtube-dl'],
991 'duration': 10,
992 'view_count': int,
993 'like_count': int,
994 'dislike_count': int,
995 'start_time': 1,
996 'end_time': 9,
997 }
998 },
999 {
1000 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1001 'note': 'Embed-only video (#1746)',
1002 'info_dict': {
1003 'id': 'yZIXLfi8CZQ',
1004 'ext': 'mp4',
1005 'upload_date': '20120608',
1006 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1007 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1008 'uploader': 'SET India',
1009 'uploader_id': 'setindia',
1010 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1011 'age_limit': 18,
1012 },
1013 'skip': 'Private video',
1014 },
1015 {
1016 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1017 'note': 'Use the first video ID in the URL',
1018 'info_dict': {
1019 'id': 'BaW_jenozKc',
1020 'ext': 'mp4',
1021 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1022 'uploader': 'Philipp Hagemeister',
1023 'uploader_id': 'phihag',
1024 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1025 'upload_date': '20121002',
1026 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1027 'categories': ['Science & Technology'],
1028 'tags': ['youtube-dl'],
1029 'duration': 10,
1030 'view_count': int,
1031 'like_count': int,
1032 'dislike_count': int,
1033 },
1034 'params': {
1035 'skip_download': True,
1036 },
1037 },
1038 {
1039 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1040 'note': '256k DASH audio (format 141) via DASH manifest',
1041 'info_dict': {
1042 'id': 'a9LDPn-MO4I',
1043 'ext': 'm4a',
1044 'upload_date': '20121002',
1045 'uploader_id': '8KVIDEO',
1046 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1047 'description': '',
1048 'uploader': '8KVIDEO',
1049 'title': 'UHDTV TEST 8K VIDEO.mp4'
1050 },
1051 'params': {
1052 'youtube_include_dash_manifest': True,
1053 'format': '141',
1054 },
1055 'skip': 'format 141 not served anymore',
1056 },
1057 # DASH manifest with encrypted signature
1058 {
1059 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1060 'info_dict': {
1061 'id': 'IB3lcPjvWLA',
1062 'ext': 'm4a',
1063 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1064 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1065 'duration': 244,
1066 'uploader': 'AfrojackVEVO',
1067 'uploader_id': 'AfrojackVEVO',
1068 'upload_date': '20131011',
1069 'abr': 129.495,
1070 },
1071 'params': {
1072 'youtube_include_dash_manifest': True,
1073 'format': '141/bestaudio[ext=m4a]',
1074 },
1075 },
1076 # Controversy video
1077 {
1078 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
1079 'info_dict': {
1080 'id': 'T4XJQO3qol8',
1081 'ext': 'mp4',
1082 'duration': 219,
1083 'upload_date': '20100909',
1084 'uploader': 'Amazing Atheist',
1085 'uploader_id': 'TheAmazingAtheist',
1086 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
1087 'title': 'Burning Everyone\'s Koran',
1088 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
1089 }
1090 },
1091 # Normal age-gate video (embed allowed)
1092 {
1093 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1094 'info_dict': {
1095 'id': 'HtVdAasjOgU',
1096 'ext': 'mp4',
1097 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1098 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1099 'duration': 142,
1100 'uploader': 'The Witcher',
1101 'uploader_id': 'WitcherGame',
1102 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1103 'upload_date': '20140605',
1104 'age_limit': 18,
1105 },
1106 },
1107 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1108 # YouTube Red ad is not captured for creator
1109 {
1110 'url': '__2ABJjxzNo',
1111 'info_dict': {
1112 'id': '__2ABJjxzNo',
1113 'ext': 'mp4',
1114 'duration': 266,
1115 'upload_date': '20100430',
1116 'uploader_id': 'deadmau5',
1117 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1118 'creator': 'deadmau5',
1119 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1120 'uploader': 'deadmau5',
1121 'title': 'Deadmau5 - Some Chords (HD)',
1122 'alt_title': 'Some Chords',
1123 },
1124 'expected_warnings': [
1125 'DASH manifest missing',
1126 ]
1127 },
1128 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1129 {
1130 'url': 'lqQg6PlCWgI',
1131 'info_dict': {
1132 'id': 'lqQg6PlCWgI',
1133 'ext': 'mp4',
1134 'duration': 6085,
1135 'upload_date': '20150827',
1136 'uploader_id': 'olympic',
1137 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1138 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1139 'uploader': 'Olympic',
1140 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1141 },
1142 'params': {
1143 'skip_download': 'requires avconv',
1144 }
1145 },
1146 # Non-square pixels
1147 {
1148 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1149 'info_dict': {
1150 'id': '_b-2C3KPAM0',
1151 'ext': 'mp4',
1152 'stretched_ratio': 16 / 9.,
1153 'duration': 85,
1154 'upload_date': '20110310',
1155 'uploader_id': 'AllenMeow',
1156 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1157 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1158 'uploader': '孫ᄋᄅ',
1159 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1160 },
1161 },
1162 # url_encoded_fmt_stream_map is empty string
1163 {
1164 'url': 'qEJwOuvDf7I',
1165 'info_dict': {
1166 'id': 'qEJwOuvDf7I',
1167 'ext': 'webm',
1168 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1169 'description': '',
1170 'upload_date': '20150404',
1171 'uploader_id': 'spbelect',
1172 'uploader': 'Наблюдатели Петербурга',
1173 },
1174 'params': {
1175 'skip_download': 'requires avconv',
1176 },
1177 'skip': 'This live event has ended.',
1178 },
1179 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1180 {
1181 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1182 'info_dict': {
1183 'id': 'FIl7x6_3R5Y',
1184 'ext': 'webm',
1185 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1186 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1187 'duration': 220,
1188 'upload_date': '20150625',
1189 'uploader_id': 'dorappi2000',
1190 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1191 'uploader': 'dorappi2000',
1192 'formats': 'mincount:31',
1193 },
1194 'skip': 'not actual anymore',
1195 },
1196 # DASH manifest with segment_list
1197 {
1198 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1199 'md5': '8ce563a1d667b599d21064e982ab9e31',
1200 'info_dict': {
1201 'id': 'CsmdDsKjzN8',
1202 'ext': 'mp4',
1203 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1204 'uploader': 'Airtek',
1205 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1206 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1207 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1208 },
1209 'params': {
1210 'youtube_include_dash_manifest': True,
1211 'format': '135', # bestvideo
1212 },
1213 'skip': 'This live event has ended.',
1214 },
1215 {
1216 # Multifeed videos (multiple cameras), URL is for Main Camera
1217 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1218 'info_dict': {
1219 'id': 'jvGDaLqkpTg',
1220 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1221 'description': 'md5:e03b909557865076822aa169218d6a5d',
1222 },
1223 'playlist': [{
1224 'info_dict': {
1225 'id': 'jvGDaLqkpTg',
1226 'ext': 'mp4',
1227 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1228 'description': 'md5:e03b909557865076822aa169218d6a5d',
1229 'duration': 10643,
1230 'upload_date': '20161111',
1231 'uploader': 'Team PGP',
1232 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1233 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1234 },
1235 }, {
1236 'info_dict': {
1237 'id': '3AKt1R1aDnw',
1238 'ext': 'mp4',
1239 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1240 'description': 'md5:e03b909557865076822aa169218d6a5d',
1241 'duration': 10991,
1242 'upload_date': '20161111',
1243 'uploader': 'Team PGP',
1244 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1245 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1246 },
1247 }, {
1248 'info_dict': {
1249 'id': 'RtAMM00gpVc',
1250 'ext': 'mp4',
1251 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1252 'description': 'md5:e03b909557865076822aa169218d6a5d',
1253 'duration': 10995,
1254 'upload_date': '20161111',
1255 'uploader': 'Team PGP',
1256 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1257 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1258 },
1259 }, {
1260 'info_dict': {
1261 'id': '6N2fdlP3C5U',
1262 'ext': 'mp4',
1263 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1264 'description': 'md5:e03b909557865076822aa169218d6a5d',
1265 'duration': 10990,
1266 'upload_date': '20161111',
1267 'uploader': 'Team PGP',
1268 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1269 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1270 },
1271 }],
1272 'params': {
1273 'skip_download': True,
1274 },
1275 },
1276 {
1277 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1278 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1279 'info_dict': {
1280 'id': 'gVfLd0zydlo',
1281 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1282 },
1283 'playlist_count': 2,
1284 'skip': 'Not multifeed anymore',
1285 },
1286 {
1287 'url': 'https://vid.plus/FlRa-iH7PGw',
1288 'only_matching': True,
1289 },
1290 {
1291 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1292 'only_matching': True,
1293 },
1294 {
1295 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1296 # Also tests cut-off URL expansion in video description (see
1297 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1298 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1299 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1300 'info_dict': {
1301 'id': 'lsguqyKfVQg',
1302 'ext': 'mp4',
1303 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1304 'alt_title': 'Dark Walk - Position Music',
1305 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1306 'duration': 133,
1307 'upload_date': '20151119',
1308 'uploader_id': 'IronSoulElf',
1309 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1310 'uploader': 'IronSoulElf',
1311 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1312 'track': 'Dark Walk - Position Music',
1313 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1314 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1315 },
1316 'params': {
1317 'skip_download': True,
1318 },
1319 },
1320 {
1321 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1322 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1323 'only_matching': True,
1324 },
1325 {
1326 # Video with yt:stretch=17:0
1327 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1328 'info_dict': {
1329 'id': 'Q39EVAstoRM',
1330 'ext': 'mp4',
1331 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1332 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1333 'upload_date': '20151107',
1334 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1335 'uploader': 'CH GAMER DROID',
1336 },
1337 'params': {
1338 'skip_download': True,
1339 },
1340 'skip': 'This video does not exist.',
1341 },
1342 {
1343 # Video with incomplete 'yt:stretch=16:'
1344 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1345 'only_matching': True,
1346 },
1347 {
1348 # Video licensed under Creative Commons
1349 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1350 'info_dict': {
1351 'id': 'M4gD1WSo5mA',
1352 'ext': 'mp4',
1353 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1354 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1355 'duration': 721,
1356 'upload_date': '20150127',
1357 'uploader_id': 'BerkmanCenter',
1358 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1359 'uploader': 'The Berkman Klein Center for Internet & Society',
1360 'license': 'Creative Commons Attribution license (reuse allowed)',
1361 },
1362 'params': {
1363 'skip_download': True,
1364 },
1365 },
1366 {
1367 # Channel-like uploader_url
1368 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1369 'info_dict': {
1370 'id': 'eQcmzGIKrzg',
1371 'ext': 'mp4',
1372 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1373 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1374 'duration': 4060,
1375 'upload_date': '20151119',
1376 'uploader': 'Bernie Sanders',
1377 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1378 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1379 'license': 'Creative Commons Attribution license (reuse allowed)',
1380 },
1381 'params': {
1382 'skip_download': True,
1383 },
1384 },
1385 {
1386 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1387 'only_matching': True,
1388 },
1389 {
1390 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1391 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1392 'only_matching': True,
1393 },
1394 {
1395 # Rental video preview
1396 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1397 'info_dict': {
1398 'id': 'uGpuVWrhIzE',
1399 'ext': 'mp4',
1400 'title': 'Piku - Trailer',
1401 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1402 'upload_date': '20150811',
1403 'uploader': 'FlixMatrix',
1404 'uploader_id': 'FlixMatrixKaravan',
1405 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1406 'license': 'Standard YouTube License',
1407 },
1408 'params': {
1409 'skip_download': True,
1410 },
1411 'skip': 'This video is not available.',
1412 },
1413 {
1414 # YouTube Red video with episode data
1415 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1416 'info_dict': {
1417 'id': 'iqKdEhx-dD4',
1418 'ext': 'mp4',
1419 'title': 'Isolation - Mind Field (Ep 1)',
1420 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1421 'duration': 2085,
1422 'upload_date': '20170118',
1423 'uploader': 'Vsauce',
1424 'uploader_id': 'Vsauce',
1425 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1426 'series': 'Mind Field',
1427 'season_number': 1,
1428 'episode_number': 1,
1429 },
1430 'params': {
1431 'skip_download': True,
1432 },
1433 'expected_warnings': [
1434 'Skipping DASH manifest',
1435 ],
1436 },
1437 {
1438 # The following content has been identified by the YouTube community
1439 # as inappropriate or offensive to some audiences.
1440 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1441 'info_dict': {
1442 'id': '6SJNVb0GnPI',
1443 'ext': 'mp4',
1444 'title': 'Race Differences in Intelligence',
1445 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1446 'duration': 965,
1447 'upload_date': '20140124',
1448 'uploader': 'New Century Foundation',
1449 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1450 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1451 },
1452 'params': {
1453 'skip_download': True,
1454 },
1455 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1456 },
1457 {
1458 # itag 212
1459 'url': '1t24XAntNCY',
1460 'only_matching': True,
1461 },
1462 {
1463 # geo restricted to JP
1464 'url': 'sJL6WA-aGkQ',
1465 'only_matching': True,
1466 },
1467 {
1468 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1469 'only_matching': True,
1470 },
1471 {
1472 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1473 'only_matching': True,
1474 },
1475 {
1476 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1477 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1478 'only_matching': True,
1479 },
1480 {
1481 # DRM protected
1482 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1483 'only_matching': True,
1484 },
1485 {
1486 # Video with unsupported adaptive stream type formats
1487 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1488 'info_dict': {
1489 'id': 'Z4Vy8R84T1U',
1490 'ext': 'mp4',
1491 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1492 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1493 'duration': 433,
1494 'upload_date': '20130923',
1495 'uploader': 'Amelia Putri Harwita',
1496 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1497 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1498 'formats': 'maxcount:10',
1499 },
1500 'params': {
1501 'skip_download': True,
1502 'youtube_include_dash_manifest': False,
1503 },
1504 'skip': 'not actual anymore',
1505 },
1506 {
1507 # Youtube Music Auto-generated description
1508 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1509 'info_dict': {
1510 'id': 'MgNrAu2pzNs',
1511 'ext': 'mp4',
1512 'title': 'Voyeur Girl',
1513 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1514 'upload_date': '20190312',
1515 'uploader': 'Stephen - Topic',
1516 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1517 'artist': 'Stephen',
1518 'track': 'Voyeur Girl',
1519 'album': 'it\'s too much love to know my dear',
1520 'release_date': '20190313',
1521 'release_year': 2019,
1522 },
1523 'params': {
1524 'skip_download': True,
1525 },
1526 },
1527 {
1528 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1529 'only_matching': True,
1530 },
1531 {
1532 # invalid -> valid video id redirection
1533 'url': 'DJztXj2GPfl',
1534 'info_dict': {
1535 'id': 'DJztXj2GPfk',
1536 'ext': 'mp4',
1537 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1538 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1539 'upload_date': '20090125',
1540 'uploader': 'Prochorowka',
1541 'uploader_id': 'Prochorowka',
1542 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1543 'artist': 'Panjabi MC',
1544 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1545 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1546 },
1547 'params': {
1548 'skip_download': True,
1549 },
1550 'skip': 'Video unavailable',
1551 },
1552 {
1553 # empty description results in an empty string
1554 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1555 'info_dict': {
1556 'id': 'x41yOUIvK2k',
1557 'ext': 'mp4',
1558 'title': 'IMG 3456',
1559 'description': '',
1560 'upload_date': '20170613',
1561 'uploader_id': 'ElevageOrVert',
1562 'uploader': 'ElevageOrVert',
1563 },
1564 'params': {
1565 'skip_download': True,
1566 },
1567 },
1568 {
1569 # with '};' inside yt initial data (see [1])
1570 # see [2] for an example with '};' inside ytInitialPlayerResponse
1571 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1572 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1573 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1574 'info_dict': {
1575 'id': 'CHqg6qOn4no',
1576 'ext': 'mp4',
1577 'title': 'Part 77 Sort a list of simple types in c#',
1578 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1579 'upload_date': '20130831',
1580 'uploader_id': 'kudvenkat',
1581 'uploader': 'kudvenkat',
1582 },
1583 'params': {
1584 'skip_download': True,
1585 },
1586 },
1587 {
1588 # another example of '};' in ytInitialData
1589 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1590 'only_matching': True,
1591 },
1592 {
1593 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1594 'only_matching': True,
1595 },
1596 {
1597 # https://github.com/ytdl-org/youtube-dl/pull/28094
1598 'url': 'OtqTfy26tG0',
1599 'info_dict': {
1600 'id': 'OtqTfy26tG0',
1601 'ext': 'mp4',
1602 'title': 'Burn Out',
1603 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1604 'upload_date': '20141120',
1605 'uploader': 'The Cinematic Orchestra - Topic',
1606 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1607 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1608 'artist': 'The Cinematic Orchestra',
1609 'track': 'Burn Out',
1610 'album': 'Every Day',
1611 'release_data': None,
1612 'release_year': None,
1613 },
1614 'params': {
1615 'skip_download': True,
1616 },
1617 },
1618 {
1619 # controversial video, only works with bpctr when authenticated with cookies
1620 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1621 'only_matching': True,
1622 },
1623 {
1624 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1625 'url': 'cBvYw8_A0vQ',
1626 'info_dict': {
1627 'id': 'cBvYw8_A0vQ',
1628 'ext': 'mp4',
1629 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1630 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1631 'upload_date': '20201120',
1632 'uploader': 'Walk around Japan',
1633 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1634 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1635 },
1636 'params': {
1637 'skip_download': True,
1638 },
1639 }, {
1640 # Has multiple audio streams
1641 'url': 'WaOKSUlf4TM',
1642 'only_matching': True
1643 }, {
1644 # Requires Premium: has format 141 when requested using YTM url
1645 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1646 'only_matching': True
1647 }, {
1648 # multiple subtitles with same lang_code
1649 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1650 'only_matching': True,
1651 }, {
1652 # Force use android client fallback
1653 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1654 'info_dict': {
1655 'id': 'YOelRv7fMxY',
1656 'title': 'Digging a Secret Tunnel from my Workshop',
1657 'ext': '3gp',
1658 'upload_date': '20210624',
1659 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1660 'uploader': 'colinfurze',
1661 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1662 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1663 },
1664 'params': {
1665 'format': '17', # 3gp format available on android
1666 'extractor_args': {'youtube': {'player_client': ['android']}},
1667 },
1668 },
1669 {
1670 # Skip download of additional client configs (remix client config in this case)
1671 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1672 'only_matching': True,
1673 'params': {
1674 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1675 },
1676 }
1677 ]
1678
1679 @classmethod
1680 def suitable(cls, url):
1681 # Hack for lazy extractors until more generic solution is implemented
1682 # (see #28780)
1683 from .youtube import parse_qs
1684 qs = parse_qs(url)
1685 if qs.get('list', [None])[0]:
1686 return False
1687 return super(YoutubeIE, cls).suitable(url)
1688
1689 def __init__(self, *args, **kwargs):
1690 super(YoutubeIE, self).__init__(*args, **kwargs)
1691 self._code_cache = {}
1692 self._player_cache = {}
1693
1694 def _extract_player_url(self, ytcfg=None, webpage=None):
1695 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1696 if not player_url:
1697 player_url = self._search_regex(
1698 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1699 webpage, 'player URL', fatal=False)
1700 if player_url.startswith('//'):
1701 player_url = 'https:' + player_url
1702 elif not re.match(r'https?://', player_url):
1703 player_url = compat_urlparse.urljoin(
1704 'https://www.youtube.com', player_url)
1705 return player_url
1706
1707 def _signature_cache_id(self, example_sig):
1708 """ Return a string representation of a signature """
1709 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1710
1711 @classmethod
1712 def _extract_player_info(cls, player_url):
1713 for player_re in cls._PLAYER_INFO_RE:
1714 id_m = re.search(player_re, player_url)
1715 if id_m:
1716 break
1717 else:
1718 raise ExtractorError('Cannot identify player %r' % player_url)
1719 return id_m.group('id')
1720
1721 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1722 player_id = self._extract_player_info(player_url)
1723 if player_id not in self._code_cache:
1724 self._code_cache[player_id] = self._download_webpage(
1725 player_url, video_id, fatal=fatal,
1726 note='Downloading player ' + player_id,
1727 errnote='Download of %s failed' % player_url)
1728 return player_id in self._code_cache
1729
1730 def _extract_signature_function(self, video_id, player_url, example_sig):
1731 player_id = self._extract_player_info(player_url)
1732
1733 # Read from filesystem cache
1734 func_id = 'js_%s_%s' % (
1735 player_id, self._signature_cache_id(example_sig))
1736 assert os.path.basename(func_id) == func_id
1737
1738 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1739 if cache_spec is not None:
1740 return lambda s: ''.join(s[i] for i in cache_spec)
1741
1742 if self._load_player(video_id, player_url):
1743 code = self._code_cache[player_id]
1744 res = self._parse_sig_js(code)
1745
1746 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1747 cache_res = res(test_string)
1748 cache_spec = [ord(c) for c in cache_res]
1749
1750 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1751 return res
1752
1753 def _print_sig_code(self, func, example_sig):
1754 def gen_sig_code(idxs):
1755 def _genslice(start, end, step):
1756 starts = '' if start == 0 else str(start)
1757 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1758 steps = '' if step == 1 else (':%d' % step)
1759 return 's[%s%s%s]' % (starts, ends, steps)
1760
1761 step = None
1762 # Quelch pyflakes warnings - start will be set when step is set
1763 start = '(Never used)'
1764 for i, prev in zip(idxs[1:], idxs[:-1]):
1765 if step is not None:
1766 if i - prev == step:
1767 continue
1768 yield _genslice(start, prev, step)
1769 step = None
1770 continue
1771 if i - prev in [-1, 1]:
1772 step = i - prev
1773 start = prev
1774 continue
1775 else:
1776 yield 's[%d]' % prev
1777 if step is None:
1778 yield 's[%d]' % i
1779 else:
1780 yield _genslice(start, i, step)
1781
1782 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1783 cache_res = func(test_string)
1784 cache_spec = [ord(c) for c in cache_res]
1785 expr_code = ' + '.join(gen_sig_code(cache_spec))
1786 signature_id_tuple = '(%s)' % (
1787 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1788 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1789 ' return %s\n') % (signature_id_tuple, expr_code)
1790 self.to_screen('Extracted signature function:\n' + code)
1791
1792 def _parse_sig_js(self, jscode):
1793 funcname = self._search_regex(
1794 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1795 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1796 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1797 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1798 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1799 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1800 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1801 # Obsolete patterns
1802 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1803 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1804 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1805 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1806 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1807 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1808 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1809 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1810 jscode, 'Initial JS player signature function name', group='sig')
1811
1812 jsi = JSInterpreter(jscode)
1813 initial_function = jsi.extract_function(funcname)
1814 return lambda s: initial_function([s])
1815
1816 def _decrypt_signature(self, s, video_id, player_url):
1817 """Turn the encrypted s field into a working signature"""
1818
1819 if player_url is None:
1820 raise ExtractorError('Cannot decrypt signature without player_url')
1821
1822 try:
1823 player_id = (player_url, self._signature_cache_id(s))
1824 if player_id not in self._player_cache:
1825 func = self._extract_signature_function(
1826 video_id, player_url, s
1827 )
1828 self._player_cache[player_id] = func
1829 func = self._player_cache[player_id]
1830 if self.get_param('youtube_print_sig_code'):
1831 self._print_sig_code(func, s)
1832 return func(s)
1833 except Exception as e:
1834 tb = traceback.format_exc()
1835 raise ExtractorError(
1836 'Signature extraction failed: ' + tb, cause=e)
1837
1838 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1839 """
1840 Extract signatureTimestamp (sts)
1841 Required to tell API what sig/player version is in use.
1842 """
1843 sts = None
1844 if isinstance(ytcfg, dict):
1845 sts = int_or_none(ytcfg.get('STS'))
1846
1847 if not sts:
1848 # Attempt to extract from player
1849 if player_url is None:
1850 error_msg = 'Cannot extract signature timestamp without player_url.'
1851 if fatal:
1852 raise ExtractorError(error_msg)
1853 self.report_warning(error_msg)
1854 return
1855 if self._load_player(video_id, player_url, fatal=fatal):
1856 player_id = self._extract_player_info(player_url)
1857 code = self._code_cache[player_id]
1858 sts = int_or_none(self._search_regex(
1859 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1860 'JS player signature timestamp', group='sts', fatal=fatal))
1861 return sts
1862
1863 def _mark_watched(self, video_id, player_response):
1864 playback_url = url_or_none(try_get(
1865 player_response,
1866 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
1867 if not playback_url:
1868 return
1869 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1870 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1871
1872 # cpn generation algorithm is reverse engineered from base.js.
1873 # In fact it works even with dummy cpn.
1874 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1875 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1876
1877 qs.update({
1878 'ver': ['2'],
1879 'cpn': [cpn],
1880 })
1881 playback_url = compat_urlparse.urlunparse(
1882 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1883
1884 self._download_webpage(
1885 playback_url, video_id, 'Marking watched',
1886 'Unable to mark watched', fatal=False)
1887
1888 @staticmethod
1889 def _extract_urls(webpage):
1890 # Embedded YouTube player
1891 entries = [
1892 unescapeHTML(mobj.group('url'))
1893 for mobj in re.finditer(r'''(?x)
1894 (?:
1895 <iframe[^>]+?src=|
1896 data-video-url=|
1897 <embed[^>]+?src=|
1898 embedSWF\(?:\s*|
1899 <object[^>]+data=|
1900 new\s+SWFObject\(
1901 )
1902 (["\'])
1903 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1904 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1905 \1''', webpage)]
1906
1907 # lazyYT YouTube embed
1908 entries.extend(list(map(
1909 unescapeHTML,
1910 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1911
1912 # Wordpress "YouTube Video Importer" plugin
1913 matches = re.findall(r'''(?x)<div[^>]+
1914 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1915 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1916 entries.extend(m[-1] for m in matches)
1917
1918 return entries
1919
1920 @staticmethod
1921 def _extract_url(webpage):
1922 urls = YoutubeIE._extract_urls(webpage)
1923 return urls[0] if urls else None
1924
1925 @classmethod
1926 def extract_id(cls, url):
1927 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1928 if mobj is None:
1929 raise ExtractorError('Invalid URL: %s' % url)
1930 video_id = mobj.group(2)
1931 return video_id
1932
1933 def _extract_chapters_from_json(self, data, video_id, duration):
1934 chapters_list = try_get(
1935 data,
1936 lambda x: x['playerOverlays']
1937 ['playerOverlayRenderer']
1938 ['decoratedPlayerBarRenderer']
1939 ['decoratedPlayerBarRenderer']
1940 ['playerBar']
1941 ['chapteredPlayerBarRenderer']
1942 ['chapters'],
1943 list)
1944 if not chapters_list:
1945 return
1946
1947 def chapter_time(chapter):
1948 return float_or_none(
1949 try_get(
1950 chapter,
1951 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1952 int),
1953 scale=1000)
1954 chapters = []
1955 for next_num, chapter in enumerate(chapters_list, start=1):
1956 start_time = chapter_time(chapter)
1957 if start_time is None:
1958 continue
1959 end_time = (chapter_time(chapters_list[next_num])
1960 if next_num < len(chapters_list) else duration)
1961 if end_time is None:
1962 continue
1963 title = try_get(
1964 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1965 compat_str)
1966 chapters.append({
1967 'start_time': start_time,
1968 'end_time': end_time,
1969 'title': title,
1970 })
1971 return chapters
1972
1973 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1974 return self._parse_json(self._search_regex(
1975 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1976 regex), webpage, name, default='{}'), video_id, fatal=False)
1977
1978 @staticmethod
1979 def parse_time_text(time_text):
1980 """
1981 Parse the comment time text
1982 time_text is in the format 'X units ago (edited)'
1983 """
1984 time_text_split = time_text.split(' ')
1985 if len(time_text_split) >= 3:
1986 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1987
1988 def _extract_comment(self, comment_renderer, parent=None):
1989 comment_id = comment_renderer.get('commentId')
1990 if not comment_id:
1991 return
1992
1993 text = self._get_text(comment_renderer.get('contentText'))
1994
1995 # note: timestamp is an estimate calculated from the current time and time_text
1996 time_text = self._get_text(comment_renderer.get('publishedTimeText')) or ''
1997 time_text_dt = self.parse_time_text(time_text)
1998 if isinstance(time_text_dt, datetime.datetime):
1999 timestamp = calendar.timegm(time_text_dt.timetuple())
2000 author = self._get_text(comment_renderer.get('authorText'))
2001 author_id = try_get(comment_renderer,
2002 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2003
2004 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2005 lambda x: x['likeCount']), compat_str)) or 0
2006 author_thumbnail = try_get(comment_renderer,
2007 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2008
2009 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2010 is_favorited = 'creatorHeart' in (try_get(
2011 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2012 return {
2013 'id': comment_id,
2014 'text': text,
2015 'timestamp': timestamp,
2016 'time_text': time_text,
2017 'like_count': votes,
2018 'is_favorited': is_favorited,
2019 'author': author,
2020 'author_id': author_id,
2021 'author_thumbnail': author_thumbnail,
2022 'author_is_uploader': author_is_uploader,
2023 'parent': parent or 'root'
2024 }
2025
2026 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2027 ytcfg, video_id, parent=None, comment_counts=None):
2028
2029 def extract_header(contents):
2030 _total_comments = 0
2031 _continuation = None
2032 for content in contents:
2033 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2034 expected_comment_count = parse_count(self._get_text(
2035 comments_header_renderer, (lambda x: x['countText'], lambda x: x['commentsCount']), max_runs=1))
2036
2037 if expected_comment_count:
2038 comment_counts[1] = expected_comment_count
2039 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2040 _total_comments = comment_counts[1]
2041 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2042 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2043
2044 sort_menu_item = try_get(
2045 comments_header_renderer,
2046 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2047 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2048
2049 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2050 if not _continuation:
2051 continue
2052
2053 sort_text = sort_menu_item.get('title')
2054 if isinstance(sort_text, compat_str):
2055 sort_text = sort_text.lower()
2056 else:
2057 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2058 self.to_screen('Sorting comments by %s' % sort_text)
2059 break
2060 return _total_comments, _continuation
2061
2062 def extract_thread(contents):
2063 if not parent:
2064 comment_counts[2] = 0
2065 for content in contents:
2066 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2067 comment_renderer = try_get(
2068 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2069 content, (lambda x: x['commentRenderer'], dict))
2070
2071 if not comment_renderer:
2072 continue
2073 comment = self._extract_comment(comment_renderer, parent)
2074 if not comment:
2075 continue
2076 comment_counts[0] += 1
2077 yield comment
2078 # Attempt to get the replies
2079 comment_replies_renderer = try_get(
2080 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2081
2082 if comment_replies_renderer:
2083 comment_counts[2] += 1
2084 comment_entries_iter = self._comment_entries(
2085 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2086 video_id, parent=comment.get('id'), comment_counts=comment_counts)
2087
2088 for reply_comment in comment_entries_iter:
2089 yield reply_comment
2090
2091 # YouTube comments have a max depth of 2
2092 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2093 if max_depth == 1 and parent:
2094 return
2095 if not comment_counts:
2096 # comment so far, est. total comments, current comment thread #
2097 comment_counts = [0, 0, 0]
2098
2099 continuation = self._extract_continuation(root_continuation_data)
2100 if continuation and len(continuation['continuation']) < 27:
2101 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2102 continuation_token = self._generate_comment_continuation(video_id)
2103 continuation = self._build_api_continuation_query(continuation_token, None)
2104
2105 visitor_data = None
2106 is_first_continuation = parent is None
2107
2108 for page_num in itertools.count(0):
2109 if not continuation:
2110 break
2111 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2112 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2113 if page_num == 0:
2114 if is_first_continuation:
2115 note_prefix = 'Downloading comment section API JSON'
2116 else:
2117 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2118 comment_counts[2], comment_prog_str)
2119 else:
2120 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2121 ' ' if parent else '', ' replies' if parent else '',
2122 page_num, comment_prog_str)
2123
2124 response = self._extract_response(
2125 item_id=None, query=continuation,
2126 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2127 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2128 if not response:
2129 break
2130 visitor_data = try_get(
2131 response,
2132 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2133 compat_str) or visitor_data
2134
2135 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2136
2137 continuation = None
2138 if isinstance(continuation_contents, list):
2139 for continuation_section in continuation_contents:
2140 if not isinstance(continuation_section, dict):
2141 continue
2142 continuation_items = try_get(
2143 continuation_section,
2144 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2145 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2146 list) or []
2147 if is_first_continuation:
2148 total_comments, continuation = extract_header(continuation_items)
2149 if total_comments:
2150 yield total_comments
2151 is_first_continuation = False
2152 if continuation:
2153 break
2154 continue
2155 count = 0
2156 for count, entry in enumerate(extract_thread(continuation_items)):
2157 yield entry
2158 continuation = self._extract_continuation({'contents': continuation_items})
2159 if continuation:
2160 # Sometimes YouTube provides a continuation without any comments
2161 # In most cases we end up just downloading these with very little comments to come.
2162 if count == 0:
2163 if not parent:
2164 self.report_warning('No comments received - assuming end of comments')
2165 continuation = None
2166 break
2167
2168 # Deprecated response structure
2169 elif isinstance(continuation_contents, dict):
2170 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2171 for key, continuation_renderer in continuation_contents.items():
2172 if key not in known_continuation_renderers:
2173 continue
2174 if not isinstance(continuation_renderer, dict):
2175 continue
2176 if is_first_continuation:
2177 header_continuation_items = [continuation_renderer.get('header') or {}]
2178 total_comments, continuation = extract_header(header_continuation_items)
2179 if total_comments:
2180 yield total_comments
2181 is_first_continuation = False
2182 if continuation:
2183 break
2184
2185 # Sometimes YouTube provides a continuation without any comments
2186 # In most cases we end up just downloading these with very little comments to come.
2187 count = 0
2188 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2189 yield entry
2190 continuation = self._extract_continuation(continuation_renderer)
2191 if count == 0:
2192 if not parent:
2193 self.report_warning('No comments received - assuming end of comments')
2194 continuation = None
2195 break
2196
2197 @staticmethod
2198 def _generate_comment_continuation(video_id):
2199 """
2200 Generates initial comment section continuation token from given video id
2201 """
2202 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2203 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2204 new_continuation_intlist = list(itertools.chain.from_iterable(
2205 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2206 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2207
2208 def _extract_comments(self, ytcfg, video_id, contents, webpage):
2209 """Entry for comment extraction"""
2210 def _real_comment_extract(contents):
2211 if isinstance(contents, list):
2212 for entry in contents:
2213 for key, renderer in entry.items():
2214 if key not in known_entry_comment_renderers:
2215 continue
2216 yield from self._comment_entries(
2217 renderer, video_id=video_id, ytcfg=ytcfg,
2218 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2219 account_syncid=self._extract_account_syncid(ytcfg))
2220 break
2221 comments = []
2222 known_entry_comment_renderers = ('itemSectionRenderer',)
2223 estimated_total = 0
2224 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2225
2226 try:
2227 for comment in _real_comment_extract(contents):
2228 if len(comments) >= max_comments:
2229 break
2230 if isinstance(comment, int):
2231 estimated_total = comment
2232 continue
2233 comments.append(comment)
2234 except KeyboardInterrupt:
2235 self.to_screen('Interrupted by user')
2236 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2237 return {
2238 'comments': comments,
2239 'comment_count': len(comments),
2240 }
2241
2242 @staticmethod
2243 def _generate_player_context(sts=None):
2244 context = {
2245 'html5Preference': 'HTML5_PREF_WANTS',
2246 }
2247 if sts is not None:
2248 context['signatureTimestamp'] = sts
2249 return {
2250 'playbackContext': {
2251 'contentPlaybackContext': context
2252 }
2253 }
2254
2255 @staticmethod
2256 def _get_video_info_params(video_id, client='TVHTML5'):
2257 GVI_CLIENTS = {
2258 'ANDROID': {
2259 'c': 'ANDROID',
2260 'cver': '16.20',
2261 },
2262 'TVHTML5': {
2263 'c': 'TVHTML5',
2264 'cver': '6.20180913',
2265 }
2266 }
2267 query = {
2268 'video_id': video_id,
2269 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2270 'html5': '1'
2271 }
2272 query.update(GVI_CLIENTS.get(client))
2273 return query
2274
2275 def _real_extract(self, url):
2276 url, smuggled_data = unsmuggle_url(url, {})
2277 video_id = self._match_id(url)
2278
2279 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2280
2281 base_url = self.http_scheme() + '//www.youtube.com/'
2282 webpage_url = base_url + 'watch?v=' + video_id
2283 webpage = self._download_webpage(
2284 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2285
2286 ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2287 identity_token = self._extract_identity_token(webpage, video_id)
2288 session_index = self._extract_session_index(ytcfg)
2289 player_url = self._extract_player_url(ytcfg, webpage)
2290
2291 player_client = self._configuration_arg('player_client', [''])[0]
2292 if player_client not in ('web', 'android', ''):
2293 self.report_warning(f'Invalid player_client {player_client} given. Falling back to android client.')
2294 force_mobile_client = player_client != 'web'
2295 player_skip = self._configuration_arg('player_skip')
2296 player_response = None
2297 if webpage:
2298 player_response = self._extract_yt_initial_variable(
2299 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2300 video_id, 'initial player response')
2301
2302 syncid = self._extract_account_syncid(ytcfg, player_response)
2303 headers = self._generate_api_headers(ytcfg, identity_token, syncid, session_index=session_index)
2304
2305 ytm_streaming_data = {}
2306 if is_music_url:
2307 ytm_webpage = None
2308 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2309 if sts and not force_mobile_client and 'configs' not in player_skip:
2310 ytm_webpage = self._download_webpage(
2311 'https://music.youtube.com',
2312 video_id, fatal=False, note='Downloading remix client config')
2313
2314 ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2315 ytm_client = 'WEB_REMIX'
2316 if not sts or force_mobile_client:
2317 # Android client already has signature descrambled
2318 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2319 if not sts:
2320 self.report_warning('Falling back to android remix client for player API.')
2321 ytm_client = 'ANDROID_MUSIC'
2322 ytm_cfg = {}
2323
2324 ytm_headers = self._generate_api_headers(
2325 ytm_cfg, identity_token, syncid,
2326 client=ytm_client, session_index=session_index)
2327 ytm_query = {'videoId': video_id}
2328 ytm_query.update(self._generate_player_context(sts))
2329
2330 ytm_player_response = self._extract_response(
2331 item_id=video_id, ep='player', query=ytm_query,
2332 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2333 default_client=ytm_client,
2334 note='Downloading %sremix player API JSON' % ('android ' if force_mobile_client else ''))
2335 ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}
2336
2337 if not player_response or force_mobile_client:
2338 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2339 yt_client = 'WEB'
2340 ytpcfg = ytcfg
2341 ytp_headers = headers
2342 if not sts or force_mobile_client:
2343 # Android client already has signature descrambled
2344 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2345 if not sts:
2346 self.report_warning('Falling back to android client for player API.')
2347 yt_client = 'ANDROID'
2348 ytpcfg = {}
2349 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid,
2350 client=yt_client, session_index=session_index)
2351
2352 yt_query = {'videoId': video_id}
2353 yt_query.update(self._generate_player_context(sts))
2354 player_response = self._extract_response(
2355 item_id=video_id, ep='player', query=yt_query,
2356 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2357 default_client=yt_client,
2358 note='Downloading %splayer API JSON' % ('android ' if force_mobile_client else '')
2359 ) or player_response
2360
2361 # Age-gate workarounds
2362 playability_status = player_response.get('playabilityStatus') or {}
2363 if playability_status.get('reason') in self._AGE_GATE_REASONS:
2364 gvi_clients = ('ANDROID', 'TVHTML5') if force_mobile_client else ('TVHTML5', 'ANDROID')
2365 for gvi_client in gvi_clients:
2366 pr = self._parse_json(try_get(compat_parse_qs(
2367 self._download_webpage(
2368 base_url + 'get_video_info', video_id,
2369 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2370 'unable to download video info webpage', fatal=False,
2371 query=self._get_video_info_params(video_id, client=gvi_client))),
2372 lambda x: x['player_response'][0],
2373 compat_str) or '{}', video_id)
2374 if pr:
2375 break
2376 if not pr:
2377 self.report_warning('Falling back to embedded-only age-gate workaround.')
2378 embed_webpage = None
2379 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2380 if sts and not force_mobile_client and 'configs' not in player_skip:
2381 embed_webpage = self._download_webpage(
2382 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2383 video_id=video_id, note='Downloading age-gated embed config')
2384
2385 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2386 # If we extracted the embed webpage, it'll tell us if we can view the video
2387 embedded_pr = self._parse_json(
2388 try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2389 video_id=video_id)
2390 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2391 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2392 yt_client = 'WEB_EMBEDDED_PLAYER'
2393 if not sts or force_mobile_client:
2394 # Android client already has signature descrambled
2395 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2396 if not sts:
2397 self.report_warning(
2398 'Falling back to android embedded client for player API (note: some formats may be missing).')
2399 yt_client = 'ANDROID_EMBEDDED_PLAYER'
2400 ytcfg_age = {}
2401
2402 ytage_headers = self._generate_api_headers(
2403 ytcfg_age, identity_token, syncid,
2404 client=yt_client, session_index=session_index)
2405 yt_age_query = {'videoId': video_id}
2406 yt_age_query.update(self._generate_player_context(sts))
2407 pr = self._extract_response(
2408 item_id=video_id, ep='player', query=yt_age_query,
2409 ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2410 default_client=yt_client,
2411 note='Downloading %sage-gated player API JSON' % ('android ' if force_mobile_client else '')
2412 ) or {}
2413
2414 if pr:
2415 player_response = pr
2416
2417 trailer_video_id = try_get(
2418 playability_status,
2419 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2420 compat_str)
2421 if trailer_video_id:
2422 return self.url_result(
2423 trailer_video_id, self.ie_key(), trailer_video_id)
2424
2425 search_meta = (
2426 lambda x: self._html_search_meta(x, webpage, default=None)) \
2427 if webpage else lambda x: None
2428
2429 video_details = player_response.get('videoDetails') or {}
2430 microformat = try_get(
2431 player_response,
2432 lambda x: x['microformat']['playerMicroformatRenderer'],
2433 dict) or {}
2434 video_title = video_details.get('title') \
2435 or self._get_text(microformat.get('title')) \
2436 or search_meta(['og:title', 'twitter:title', 'title'])
2437 video_description = video_details.get('shortDescription')
2438
2439 if not smuggled_data.get('force_singlefeed', False):
2440 if not self.get_param('noplaylist'):
2441 multifeed_metadata_list = try_get(
2442 player_response,
2443 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
2444 compat_str)
2445 if multifeed_metadata_list:
2446 entries = []
2447 feed_ids = []
2448 for feed in multifeed_metadata_list.split(','):
2449 # Unquote should take place before split on comma (,) since textual
2450 # fields may contain comma as well (see
2451 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2452 feed_data = compat_parse_qs(
2453 compat_urllib_parse_unquote_plus(feed))
2454
2455 def feed_entry(name):
2456 return try_get(
2457 feed_data, lambda x: x[name][0], compat_str)
2458
2459 feed_id = feed_entry('id')
2460 if not feed_id:
2461 continue
2462 feed_title = feed_entry('title')
2463 title = video_title
2464 if feed_title:
2465 title += ' (%s)' % feed_title
2466 entries.append({
2467 '_type': 'url_transparent',
2468 'ie_key': 'Youtube',
2469 'url': smuggle_url(
2470 base_url + 'watch?v=' + feed_data['id'][0],
2471 {'force_singlefeed': True}),
2472 'title': title,
2473 })
2474 feed_ids.append(feed_id)
2475 self.to_screen(
2476 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2477 % (', '.join(feed_ids), video_id))
2478 return self.playlist_result(
2479 entries, video_id, video_title, video_description)
2480 else:
2481 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2482
2483 formats, itags, stream_ids = [], [], []
2484 itag_qualities = {}
2485 q = qualities([
2486 # "tiny" is the smallest video-only format. But some audio-only formats
2487 # was also labeled "tiny". It is not clear if such formats still exist
2488 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2489 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2490 ])
2491
2492 streaming_data = player_response.get('streamingData') or {}
2493 streaming_formats = streaming_data.get('formats') or []
2494 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
2495 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2496 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2497
2498 for fmt in streaming_formats:
2499 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2500 continue
2501
2502 itag = str_or_none(fmt.get('itag'))
2503 audio_track = fmt.get('audioTrack') or {}
2504 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2505 if stream_id in stream_ids:
2506 continue
2507
2508 quality = fmt.get('quality')
2509 if quality == 'tiny' or not quality:
2510 quality = fmt.get('audioQuality', '').lower() or quality
2511 if itag and quality:
2512 itag_qualities[itag] = quality
2513 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2514 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2515 # number of fragment that would subsequently requested with (`&sq=N`)
2516 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2517 continue
2518
2519 fmt_url = fmt.get('url')
2520 if not fmt_url:
2521 sc = compat_parse_qs(fmt.get('signatureCipher'))
2522 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2523 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2524 if not (sc and fmt_url and encrypted_sig):
2525 continue
2526 if not player_url:
2527 continue
2528 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2529 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2530 fmt_url += '&' + sp + '=' + signature
2531
2532 if itag:
2533 itags.append(itag)
2534 stream_ids.append(stream_id)
2535
2536 tbr = float_or_none(
2537 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2538 dct = {
2539 'asr': int_or_none(fmt.get('audioSampleRate')),
2540 'filesize': int_or_none(fmt.get('contentLength')),
2541 'format_id': itag,
2542 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
2543 'fps': int_or_none(fmt.get('fps')),
2544 'height': int_or_none(fmt.get('height')),
2545 'quality': q(quality),
2546 'tbr': tbr,
2547 'url': fmt_url,
2548 'width': fmt.get('width'),
2549 'language': audio_track.get('id', '').split('.')[0],
2550 }
2551 mime_mobj = re.match(
2552 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2553 if mime_mobj:
2554 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2555 dct.update(parse_codecs(mime_mobj.group(2)))
2556 # The 3gp format in android client has a quality of "small",
2557 # but is actually worse than all other formats
2558 if dct['ext'] == '3gp':
2559 dct['quality'] = q('tiny')
2560 no_audio = dct.get('acodec') == 'none'
2561 no_video = dct.get('vcodec') == 'none'
2562 if no_audio:
2563 dct['vbr'] = tbr
2564 if no_video:
2565 dct['abr'] = tbr
2566 if no_audio or no_video:
2567 dct['downloader_options'] = {
2568 # Youtube throttles chunks >~10M
2569 'http_chunk_size': 10485760,
2570 }
2571 if dct.get('ext'):
2572 dct['container'] = dct['ext'] + '_dash'
2573 formats.append(dct)
2574
2575 skip_manifests = self._configuration_arg('skip')
2576 get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2577 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2578
2579 for sd in (streaming_data, ytm_streaming_data):
2580 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2581 if hls_manifest_url:
2582 for f in self._extract_m3u8_formats(
2583 hls_manifest_url, video_id, 'mp4', fatal=False):
2584 itag = self._search_regex(
2585 r'/itag/(\d+)', f['url'], 'itag', default=None)
2586 if itag:
2587 f['format_id'] = itag
2588 formats.append(f)
2589
2590 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2591 if dash_manifest_url:
2592 for f in self._extract_mpd_formats(
2593 dash_manifest_url, video_id, fatal=False):
2594 itag = f['format_id']
2595 if itag in itags:
2596 continue
2597 if itag in itag_qualities:
2598 f['quality'] = q(itag_qualities[itag])
2599 filesize = int_or_none(self._search_regex(
2600 r'/clen/(\d+)', f.get('fragment_base_url')
2601 or f['url'], 'file size', default=None))
2602 if filesize:
2603 f['filesize'] = filesize
2604 formats.append(f)
2605
2606 if not formats:
2607 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
2608 self.raise_no_formats(
2609 'This video is DRM protected.', expected=True)
2610 pemr = try_get(
2611 playability_status,
2612 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2613 dict) or {}
2614 reason = self._get_text(pemr.get('reason')) or playability_status.get('reason')
2615 subreason = pemr.get('subreason')
2616 if subreason:
2617 subreason = clean_html(self._get_text(subreason))
2618 if subreason == 'The uploader has not made this video available in your country.':
2619 countries = microformat.get('availableCountries')
2620 if not countries:
2621 regions_allowed = search_meta('regionsAllowed')
2622 countries = regions_allowed.split(',') if regions_allowed else None
2623 self.raise_geo_restricted(subreason, countries, metadata_available=True)
2624 reason += '\n' + subreason
2625 if reason:
2626 self.raise_no_formats(reason, expected=True)
2627
2628 self._sort_formats(formats)
2629
2630 keywords = video_details.get('keywords') or []
2631 if not keywords and webpage:
2632 keywords = [
2633 unescapeHTML(m.group('content'))
2634 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2635 for keyword in keywords:
2636 if keyword.startswith('yt:stretch='):
2637 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2638 if mobj:
2639 # NB: float is intentional for forcing float division
2640 w, h = (float(v) for v in mobj.groups())
2641 if w > 0 and h > 0:
2642 ratio = w / h
2643 for f in formats:
2644 if f.get('vcodec') != 'none':
2645 f['stretched_ratio'] = ratio
2646 break
2647
2648 thumbnails = []
2649 for container in (video_details, microformat):
2650 for thumbnail in (try_get(
2651 container,
2652 lambda x: x['thumbnail']['thumbnails'], list) or []):
2653 thumbnail_url = thumbnail.get('url')
2654 if not thumbnail_url:
2655 continue
2656 # Sometimes youtube gives a wrong thumbnail URL. See:
2657 # https://github.com/yt-dlp/yt-dlp/issues/233
2658 # https://github.com/ytdl-org/youtube-dl/issues/28023
2659 if 'maxresdefault' in thumbnail_url:
2660 thumbnail_url = thumbnail_url.split('?')[0]
2661 thumbnails.append({
2662 'url': thumbnail_url,
2663 'height': int_or_none(thumbnail.get('height')),
2664 'width': int_or_none(thumbnail.get('width')),
2665 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2666 })
2667 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2668 if thumbnail_url:
2669 thumbnails.append({
2670 'url': thumbnail_url,
2671 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2672 })
2673 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2674 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2675 thumbnails.append({
2676 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2677 'preference': 1,
2678 })
2679 self._remove_duplicate_formats(thumbnails)
2680
2681 category = microformat.get('category') or search_meta('genre')
2682 channel_id = video_details.get('channelId') \
2683 or microformat.get('externalChannelId') \
2684 or search_meta('channelId')
2685 duration = int_or_none(
2686 video_details.get('lengthSeconds')
2687 or microformat.get('lengthSeconds')) \
2688 or parse_duration(search_meta('duration'))
2689 is_live = video_details.get('isLive')
2690 is_upcoming = video_details.get('isUpcoming')
2691 owner_profile_url = microformat.get('ownerProfileUrl')
2692
2693 info = {
2694 'id': video_id,
2695 'title': self._live_title(video_title) if is_live else video_title,
2696 'formats': formats,
2697 'thumbnails': thumbnails,
2698 'description': video_description,
2699 'upload_date': unified_strdate(
2700 microformat.get('uploadDate')
2701 or search_meta('uploadDate')),
2702 'uploader': video_details['author'],
2703 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2704 'uploader_url': owner_profile_url,
2705 'channel_id': channel_id,
2706 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2707 'duration': duration,
2708 'view_count': int_or_none(
2709 video_details.get('viewCount')
2710 or microformat.get('viewCount')
2711 or search_meta('interactionCount')),
2712 'average_rating': float_or_none(video_details.get('averageRating')),
2713 'age_limit': 18 if (
2714 microformat.get('isFamilySafe') is False
2715 or search_meta('isFamilyFriendly') == 'false'
2716 or search_meta('og:restrictions:age') == '18+') else 0,
2717 'webpage_url': webpage_url,
2718 'categories': [category] if category else None,
2719 'tags': keywords,
2720 'is_live': is_live,
2721 'playable_in_embed': playability_status.get('playableInEmbed'),
2722 'was_live': video_details.get('isLiveContent'),
2723 }
2724
2725 pctr = try_get(
2726 player_response,
2727 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2728 subtitles = {}
2729 if pctr:
2730 def process_language(container, base_url, lang_code, sub_name, query):
2731 lang_subs = container.setdefault(lang_code, [])
2732 for fmt in self._SUBTITLE_FORMATS:
2733 query.update({
2734 'fmt': fmt,
2735 })
2736 lang_subs.append({
2737 'ext': fmt,
2738 'url': update_url_query(base_url, query),
2739 'name': sub_name,
2740 })
2741
2742 for caption_track in (pctr.get('captionTracks') or []):
2743 base_url = caption_track.get('baseUrl')
2744 if not base_url:
2745 continue
2746 if caption_track.get('kind') != 'asr':
2747 lang_code = (
2748 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2749 or caption_track.get('languageCode'))
2750 if not lang_code:
2751 continue
2752 process_language(
2753 subtitles, base_url, lang_code,
2754 try_get(caption_track, lambda x: x['name']['simpleText']),
2755 {})
2756 continue
2757 automatic_captions = {}
2758 for translation_language in (pctr.get('translationLanguages') or []):
2759 translation_language_code = translation_language.get('languageCode')
2760 if not translation_language_code:
2761 continue
2762 process_language(
2763 automatic_captions, base_url, translation_language_code,
2764 self._get_text(translation_language.get('languageName'), max_runs=1),
2765 {'tlang': translation_language_code})
2766 info['automatic_captions'] = automatic_captions
2767 info['subtitles'] = subtitles
2768
2769 parsed_url = compat_urllib_parse_urlparse(url)
2770 for component in [parsed_url.fragment, parsed_url.query]:
2771 query = compat_parse_qs(component)
2772 for k, v in query.items():
2773 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2774 d_k += '_time'
2775 if d_k not in info and k in s_ks:
2776 info[d_k] = parse_duration(query[k][0])
2777
2778 # Youtube Music Auto-generated description
2779 if video_description:
2780 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2781 if mobj:
2782 release_year = mobj.group('release_year')
2783 release_date = mobj.group('release_date')
2784 if release_date:
2785 release_date = release_date.replace('-', '')
2786 if not release_year:
2787 release_year = release_date[:4]
2788 info.update({
2789 'album': mobj.group('album'.strip()),
2790 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2791 'track': mobj.group('track').strip(),
2792 'release_date': release_date,
2793 'release_year': int_or_none(release_year),
2794 })
2795
2796 initial_data = None
2797 if webpage:
2798 initial_data = self._extract_yt_initial_variable(
2799 webpage, self._YT_INITIAL_DATA_RE, video_id,
2800 'yt initial data')
2801 if not initial_data:
2802 initial_data = self._extract_response(
2803 item_id=video_id, ep='next', fatal=False,
2804 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2805 note='Downloading initial data API JSON')
2806
2807 try:
2808 # This will error if there is no livechat
2809 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2810 info['subtitles']['live_chat'] = [{
2811 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2812 'video_id': video_id,
2813 'ext': 'json',
2814 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2815 }]
2816 except (KeyError, IndexError, TypeError):
2817 pass
2818
2819 if initial_data:
2820 chapters = self._extract_chapters_from_json(
2821 initial_data, video_id, duration)
2822 if not chapters:
2823 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2824 contents = try_get(
2825 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2826 list)
2827 if not contents:
2828 continue
2829
2830 def chapter_time(mmlir):
2831 return parse_duration(
2832 self._get_text(mmlir.get('timeDescription')))
2833
2834 chapters = []
2835 for next_num, content in enumerate(contents, start=1):
2836 mmlir = content.get('macroMarkersListItemRenderer') or {}
2837 start_time = chapter_time(mmlir)
2838 end_time = chapter_time(try_get(
2839 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2840 if next_num < len(contents) else duration
2841 if start_time is None or end_time is None:
2842 continue
2843 chapters.append({
2844 'start_time': start_time,
2845 'end_time': end_time,
2846 'title': self._get_text(mmlir.get('title')),
2847 })
2848 if chapters:
2849 break
2850 if chapters:
2851 info['chapters'] = chapters
2852
2853 contents = try_get(
2854 initial_data,
2855 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2856 list) or []
2857 for content in contents:
2858 vpir = content.get('videoPrimaryInfoRenderer')
2859 if vpir:
2860 stl = vpir.get('superTitleLink')
2861 if stl:
2862 stl = self._get_text(stl)
2863 if try_get(
2864 vpir,
2865 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2866 info['location'] = stl
2867 else:
2868 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2869 if mobj:
2870 info.update({
2871 'series': mobj.group(1),
2872 'season_number': int(mobj.group(2)),
2873 'episode_number': int(mobj.group(3)),
2874 })
2875 for tlb in (try_get(
2876 vpir,
2877 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2878 list) or []):
2879 tbr = tlb.get('toggleButtonRenderer') or {}
2880 for getter, regex in [(
2881 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2882 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2883 lambda x: x['accessibility'],
2884 lambda x: x['accessibilityData']['accessibilityData'],
2885 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2886 label = (try_get(tbr, getter, dict) or {}).get('label')
2887 if label:
2888 mobj = re.match(regex, label)
2889 if mobj:
2890 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2891 break
2892 sbr_tooltip = try_get(
2893 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2894 if sbr_tooltip:
2895 like_count, dislike_count = sbr_tooltip.split(' / ')
2896 info.update({
2897 'like_count': str_to_int(like_count),
2898 'dislike_count': str_to_int(dislike_count),
2899 })
2900 vsir = content.get('videoSecondaryInfoRenderer')
2901 if vsir:
2902 info['channel'] = self._get_text(try_get(
2903 vsir,
2904 lambda x: x['owner']['videoOwnerRenderer']['title'],
2905 dict))
2906 rows = try_get(
2907 vsir,
2908 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2909 list) or []
2910 multiple_songs = False
2911 for row in rows:
2912 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2913 multiple_songs = True
2914 break
2915 for row in rows:
2916 mrr = row.get('metadataRowRenderer') or {}
2917 mrr_title = mrr.get('title')
2918 if not mrr_title:
2919 continue
2920 mrr_title = self._get_text(mrr['title'])
2921 mrr_contents_text = self._get_text(mrr['contents'][0])
2922 if mrr_title == 'License':
2923 info['license'] = mrr_contents_text
2924 elif not multiple_songs:
2925 if mrr_title == 'Album':
2926 info['album'] = mrr_contents_text
2927 elif mrr_title == 'Artist':
2928 info['artist'] = mrr_contents_text
2929 elif mrr_title == 'Song':
2930 info['track'] = mrr_contents_text
2931
2932 fallbacks = {
2933 'channel': 'uploader',
2934 'channel_id': 'uploader_id',
2935 'channel_url': 'uploader_url',
2936 }
2937 for to, frm in fallbacks.items():
2938 if not info.get(to):
2939 info[to] = info.get(frm)
2940
2941 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2942 v = info.get(s_k)
2943 if v:
2944 info[d_k] = v
2945
2946 is_private = bool_or_none(video_details.get('isPrivate'))
2947 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2948 is_membersonly = None
2949 is_premium = None
2950 if initial_data and is_private is not None:
2951 is_membersonly = False
2952 is_premium = False
2953 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2954 badge_labels = set()
2955 for content in contents:
2956 if not isinstance(content, dict):
2957 continue
2958 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
2959 for badge_label in badge_labels:
2960 if badge_label.lower() == 'members only':
2961 is_membersonly = True
2962 elif badge_label.lower() == 'premium':
2963 is_premium = True
2964 elif badge_label.lower() == 'unlisted':
2965 is_unlisted = True
2966
2967 info['availability'] = self._availability(
2968 is_private=is_private,
2969 needs_premium=is_premium,
2970 needs_subscription=is_membersonly,
2971 needs_auth=info['age_limit'] >= 18,
2972 is_unlisted=None if is_private is None else is_unlisted)
2973
2974 # get xsrf for annotations or comments
2975 get_annotations = self.get_param('writeannotations', False)
2976 get_comments = self.get_param('getcomments', False)
2977 if get_annotations or get_comments:
2978 xsrf_token = None
2979 ytcfg = self._extract_ytcfg(video_id, webpage)
2980 if ytcfg:
2981 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2982 if not xsrf_token:
2983 xsrf_token = self._search_regex(
2984 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
2985 webpage, 'xsrf token', group='xsrf_token', fatal=False)
2986
2987 # annotations
2988 if get_annotations:
2989 invideo_url = try_get(
2990 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2991 if xsrf_token and invideo_url:
2992 xsrf_field_name = None
2993 if ytcfg:
2994 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2995 if not xsrf_field_name:
2996 xsrf_field_name = self._search_regex(
2997 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2998 webpage, 'xsrf field name',
2999 group='xsrf_field_name', default='session_token')
3000 info['annotations'] = self._download_webpage(
3001 self._proto_relative_url(invideo_url),
3002 video_id, note='Downloading annotations',
3003 errnote='Unable to download video annotations', fatal=False,
3004 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3005
3006 if get_comments:
3007 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)
3008
3009 self.mark_watched(video_id, player_response)
3010
3011 return info
3012
3013
3014 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3015 IE_DESC = 'YouTube.com tab'
3016 _VALID_URL = r'''(?x)
3017 https?://
3018 (?:\w+\.)?
3019 (?:
3020 youtube(?:kids)?\.com|
3021 invidio\.us
3022 )/
3023 (?:
3024 (?P<channel_type>channel|c|user|browse)/|
3025 (?P<not_channel>
3026 feed/|hashtag/|
3027 (?:playlist|watch)\?.*?\blist=
3028 )|
3029 (?!(?:%s)\b) # Direct URLs
3030 )
3031 (?P<id>[^/?\#&]+)
3032 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3033 IE_NAME = 'youtube:tab'
3034
3035 _TESTS = [{
3036 'note': 'playlists, multipage',
3037 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3038 'playlist_mincount': 94,
3039 'info_dict': {
3040 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3041 'title': 'Игорь Клейнер - Playlists',
3042 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3043 'uploader': 'Игорь Клейнер',
3044 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3045 },
3046 }, {
3047 'note': 'playlists, multipage, different order',
3048 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3049 'playlist_mincount': 94,
3050 'info_dict': {
3051 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3052 'title': 'Игорь Клейнер - Playlists',
3053 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3054 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3055 'uploader': 'Игорь Клейнер',
3056 },
3057 }, {
3058 'note': 'playlists, series',
3059 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3060 'playlist_mincount': 5,
3061 'info_dict': {
3062 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3063 'title': '3Blue1Brown - Playlists',
3064 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3065 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3066 'uploader': '3Blue1Brown',
3067 },
3068 }, {
3069 'note': 'playlists, singlepage',
3070 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3071 'playlist_mincount': 4,
3072 'info_dict': {
3073 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3074 'title': 'ThirstForScience - Playlists',
3075 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3076 'uploader': 'ThirstForScience',
3077 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3078 }
3079 }, {
3080 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3081 'only_matching': True,
3082 }, {
3083 'note': 'basic, single video playlist',
3084 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3085 'info_dict': {
3086 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3087 'uploader': 'Sergey M.',
3088 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3089 'title': 'youtube-dl public playlist',
3090 },
3091 'playlist_count': 1,
3092 }, {
3093 'note': 'empty playlist',
3094 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3095 'info_dict': {
3096 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3097 'uploader': 'Sergey M.',
3098 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3099 'title': 'youtube-dl empty playlist',
3100 },
3101 'playlist_count': 0,
3102 }, {
3103 'note': 'Home tab',
3104 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3105 'info_dict': {
3106 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3107 'title': 'lex will - Home',
3108 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3109 'uploader': 'lex will',
3110 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3111 },
3112 'playlist_mincount': 2,
3113 }, {
3114 'note': 'Videos tab',
3115 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3116 'info_dict': {
3117 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3118 'title': 'lex will - Videos',
3119 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3120 'uploader': 'lex will',
3121 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3122 },
3123 'playlist_mincount': 975,
3124 }, {
3125 'note': 'Videos tab, sorted by popular',
3126 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3127 'info_dict': {
3128 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3129 'title': 'lex will - Videos',
3130 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3131 'uploader': 'lex will',
3132 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3133 },
3134 'playlist_mincount': 199,
3135 }, {
3136 'note': 'Playlists tab',
3137 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3138 'info_dict': {
3139 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3140 'title': 'lex will - Playlists',
3141 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3142 'uploader': 'lex will',
3143 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3144 },
3145 'playlist_mincount': 17,
3146 }, {
3147 'note': 'Community tab',
3148 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3149 'info_dict': {
3150 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3151 'title': 'lex will - Community',
3152 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3153 'uploader': 'lex will',
3154 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3155 },
3156 'playlist_mincount': 18,
3157 }, {
3158 'note': 'Channels tab',
3159 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3160 'info_dict': {
3161 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3162 'title': 'lex will - Channels',
3163 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3164 'uploader': 'lex will',
3165 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3166 },
3167 'playlist_mincount': 12,
3168 }, {
3169 'note': 'Search tab',
3170 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3171 'playlist_mincount': 40,
3172 'info_dict': {
3173 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3174 'title': '3Blue1Brown - Search - linear algebra',
3175 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3176 'uploader': '3Blue1Brown',
3177 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3178 },
3179 }, {
3180 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3181 'only_matching': True,
3182 }, {
3183 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3184 'only_matching': True,
3185 }, {
3186 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3187 'only_matching': True,
3188 }, {
3189 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3190 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3191 'info_dict': {
3192 'title': '29C3: Not my department',
3193 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3194 'uploader': 'Christiaan008',
3195 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3196 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3197 },
3198 'playlist_count': 96,
3199 }, {
3200 'note': 'Large playlist',
3201 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3202 'info_dict': {
3203 'title': 'Uploads from Cauchemar',
3204 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3205 'uploader': 'Cauchemar',
3206 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3207 },
3208 'playlist_mincount': 1123,
3209 }, {
3210 'note': 'even larger playlist, 8832 videos',
3211 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3212 'only_matching': True,
3213 }, {
3214 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3215 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3216 'info_dict': {
3217 'title': 'Uploads from Interstellar Movie',
3218 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3219 'uploader': 'Interstellar Movie',
3220 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3221 },
3222 'playlist_mincount': 21,
3223 }, {
3224 'note': 'Playlist with "show unavailable videos" button',
3225 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3226 'info_dict': {
3227 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3228 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3229 'uploader': 'Phim Siêu Nhân Nhật Bản',
3230 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3231 },
3232 'playlist_mincount': 200,
3233 }, {
3234 'note': 'Playlist with unavailable videos in page 7',
3235 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3236 'info_dict': {
3237 'title': 'Uploads from BlankTV',
3238 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3239 'uploader': 'BlankTV',
3240 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3241 },
3242 'playlist_mincount': 1000,
3243 }, {
3244 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3245 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3246 'info_dict': {
3247 'title': 'Data Analysis with Dr Mike Pound',
3248 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3249 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3250 'uploader': 'Computerphile',
3251 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3252 },
3253 'playlist_mincount': 11,
3254 }, {
3255 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3256 'only_matching': True,
3257 }, {
3258 'note': 'Playlist URL that does not actually serve a playlist',
3259 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3260 'info_dict': {
3261 'id': 'FqZTN594JQw',
3262 'ext': 'webm',
3263 'title': "Smiley's People 01 detective, Adventure Series, Action",
3264 'uploader': 'STREEM',
3265 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3266 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3267 'upload_date': '20150526',
3268 'license': 'Standard YouTube License',
3269 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3270 'categories': ['People & Blogs'],
3271 'tags': list,
3272 'view_count': int,
3273 'like_count': int,
3274 'dislike_count': int,
3275 },
3276 'params': {
3277 'skip_download': True,
3278 },
3279 'skip': 'This video is not available.',
3280 'add_ie': [YoutubeIE.ie_key()],
3281 }, {
3282 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3283 'only_matching': True,
3284 }, {
3285 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3286 'only_matching': True,
3287 }, {
3288 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3289 'info_dict': {
3290 'id': 'X1whbWASnNQ', # This will keep changing
3291 'ext': 'mp4',
3292 'title': compat_str,
3293 'uploader': 'Sky News',
3294 'uploader_id': 'skynews',
3295 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3296 'upload_date': r're:\d{8}',
3297 'description': compat_str,
3298 'categories': ['News & Politics'],
3299 'tags': list,
3300 'like_count': int,
3301 'dislike_count': int,
3302 },
3303 'params': {
3304 'skip_download': True,
3305 },
3306 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3307 }, {
3308 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3309 'info_dict': {
3310 'id': 'a48o2S1cPoo',
3311 'ext': 'mp4',
3312 'title': 'The Young Turks - Live Main Show',
3313 'uploader': 'The Young Turks',
3314 'uploader_id': 'TheYoungTurks',
3315 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3316 'upload_date': '20150715',
3317 'license': 'Standard YouTube License',
3318 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3319 'categories': ['News & Politics'],
3320 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3321 'like_count': int,
3322 'dislike_count': int,
3323 },
3324 'params': {
3325 'skip_download': True,
3326 },
3327 'only_matching': True,
3328 }, {
3329 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3330 'only_matching': True,
3331 }, {
3332 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3333 'only_matching': True,
3334 }, {
3335 'note': 'A channel that is not live. Should raise error',
3336 'url': 'https://www.youtube.com/user/numberphile/live',
3337 'only_matching': True,
3338 }, {
3339 'url': 'https://www.youtube.com/feed/trending',
3340 'only_matching': True,
3341 }, {
3342 'url': 'https://www.youtube.com/feed/library',
3343 'only_matching': True,
3344 }, {
3345 'url': 'https://www.youtube.com/feed/history',
3346 'only_matching': True,
3347 }, {
3348 'url': 'https://www.youtube.com/feed/subscriptions',
3349 'only_matching': True,
3350 }, {
3351 'url': 'https://www.youtube.com/feed/watch_later',
3352 'only_matching': True,
3353 }, {
3354 'note': 'Recommended - redirects to home page',
3355 'url': 'https://www.youtube.com/feed/recommended',
3356 'only_matching': True,
3357 }, {
3358 'note': 'inline playlist with not always working continuations',
3359 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3360 'only_matching': True,
3361 }, {
3362 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3363 'only_matching': True,
3364 }, {
3365 'url': 'https://www.youtube.com/course',
3366 'only_matching': True,
3367 }, {
3368 'url': 'https://www.youtube.com/zsecurity',
3369 'only_matching': True,
3370 }, {
3371 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3372 'only_matching': True,
3373 }, {
3374 'url': 'https://www.youtube.com/TheYoungTurks/live',
3375 'only_matching': True,
3376 }, {
3377 'url': 'https://www.youtube.com/hashtag/cctv9',
3378 'info_dict': {
3379 'id': 'cctv9',
3380 'title': '#cctv9',
3381 },
3382 'playlist_mincount': 350,
3383 }, {
3384 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3385 'only_matching': True,
3386 }, {
3387 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3388 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3389 'only_matching': True
3390 }, {
3391 'note': '/browse/ should redirect to /channel/',
3392 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3393 'only_matching': True
3394 }, {
3395 'note': 'VLPL, should redirect to playlist?list=PL...',
3396 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3397 'info_dict': {
3398 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3399 'uploader': 'NoCopyrightSounds',
3400 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3401 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3402 'title': 'NCS Releases',
3403 },
3404 'playlist_mincount': 166,
3405 }, {
3406 'note': 'Topic, should redirect to playlist?list=UU...',
3407 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3408 'info_dict': {
3409 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3410 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3411 'title': 'Uploads from Royalty Free Music - Topic',
3412 'uploader': 'Royalty Free Music - Topic',
3413 },
3414 'expected_warnings': [
3415 'A channel/user page was given',
3416 'The URL does not have a videos tab',
3417 ],
3418 'playlist_mincount': 101,
3419 }, {
3420 'note': 'Topic without a UU playlist',
3421 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3422 'info_dict': {
3423 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3424 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3425 },
3426 'expected_warnings': [
3427 'A channel/user page was given',
3428 'The URL does not have a videos tab',
3429 'Falling back to channel URL',
3430 ],
3431 'playlist_mincount': 9,
3432 }, {
3433 'note': 'Youtube music Album',
3434 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3435 'info_dict': {
3436 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3437 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3438 },
3439 'playlist_count': 50,
3440 }, {
3441 'note': 'unlisted single video playlist',
3442 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3443 'info_dict': {
3444 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3445 'uploader': 'colethedj',
3446 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3447 'title': 'yt-dlp unlisted playlist test',
3448 'availability': 'unlisted'
3449 },
3450 'playlist_count': 1,
3451 }]
3452
3453 @classmethod
3454 def suitable(cls, url):
3455 return False if YoutubeIE.suitable(url) else super(
3456 YoutubeTabIE, cls).suitable(url)
3457
3458 def _extract_channel_id(self, webpage):
3459 channel_id = self._html_search_meta(
3460 'channelId', webpage, 'channel id', default=None)
3461 if channel_id:
3462 return channel_id
3463 channel_url = self._html_search_meta(
3464 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3465 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3466 'twitter:app:url:googleplay'), webpage, 'channel url')
3467 return self._search_regex(
3468 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3469 channel_url, 'channel id')
3470
3471 @staticmethod
3472 def _extract_basic_item_renderer(item):
3473 # Modified from _extract_grid_item_renderer
3474 known_basic_renderers = (
3475 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3476 )
3477 for key, renderer in item.items():
3478 if not isinstance(renderer, dict):
3479 continue
3480 elif key in known_basic_renderers:
3481 return renderer
3482 elif key.startswith('grid') and key.endswith('Renderer'):
3483 return renderer
3484
3485 def _grid_entries(self, grid_renderer):
3486 for item in grid_renderer['items']:
3487 if not isinstance(item, dict):
3488 continue
3489 renderer = self._extract_basic_item_renderer(item)
3490 if not isinstance(renderer, dict):
3491 continue
3492 title = self._get_text(renderer.get('title'))
3493
3494 # playlist
3495 playlist_id = renderer.get('playlistId')
3496 if playlist_id:
3497 yield self.url_result(
3498 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3499 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3500 video_title=title)
3501 continue
3502 # video
3503 video_id = renderer.get('videoId')
3504 if video_id:
3505 yield self._extract_video(renderer)
3506 continue
3507 # channel
3508 channel_id = renderer.get('channelId')
3509 if channel_id:
3510 yield self.url_result(
3511 'https://www.youtube.com/channel/%s' % channel_id,
3512 ie=YoutubeTabIE.ie_key(), video_title=title)
3513 continue
3514 # generic endpoint URL support
3515 ep_url = urljoin('https://www.youtube.com/', try_get(
3516 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3517 compat_str))
3518 if ep_url:
3519 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3520 if ie.suitable(ep_url):
3521 yield self.url_result(
3522 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3523 break
3524
3525 def _shelf_entries_from_content(self, shelf_renderer):
3526 content = shelf_renderer.get('content')
3527 if not isinstance(content, dict):
3528 return
3529 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3530 if renderer:
3531 # TODO: add support for nested playlists so each shelf is processed
3532 # as separate playlist
3533 # TODO: this includes only first N items
3534 for entry in self._grid_entries(renderer):
3535 yield entry
3536 renderer = content.get('horizontalListRenderer')
3537 if renderer:
3538 # TODO
3539 pass
3540
3541 def _shelf_entries(self, shelf_renderer, skip_channels=False):
3542 ep = try_get(
3543 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3544 compat_str)
3545 shelf_url = urljoin('https://www.youtube.com', ep)
3546 if shelf_url:
3547 # Skipping links to another channels, note that checking for
3548 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3549 # will not work
3550 if skip_channels and '/channels?' in shelf_url:
3551 return
3552 title = self._get_text(shelf_renderer, lambda x: x['title'])
3553 yield self.url_result(shelf_url, video_title=title)
3554 # Shelf may not contain shelf URL, fallback to extraction from content
3555 for entry in self._shelf_entries_from_content(shelf_renderer):
3556 yield entry
3557
3558 def _playlist_entries(self, video_list_renderer):
3559 for content in video_list_renderer['contents']:
3560 if not isinstance(content, dict):
3561 continue
3562 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3563 if not isinstance(renderer, dict):
3564 continue
3565 video_id = renderer.get('videoId')
3566 if not video_id:
3567 continue
3568 yield self._extract_video(renderer)
3569
3570 def _rich_entries(self, rich_grid_renderer):
3571 renderer = try_get(
3572 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3573 video_id = renderer.get('videoId')
3574 if not video_id:
3575 return
3576 yield self._extract_video(renderer)
3577
3578 def _video_entry(self, video_renderer):
3579 video_id = video_renderer.get('videoId')
3580 if video_id:
3581 return self._extract_video(video_renderer)
3582
3583 def _post_thread_entries(self, post_thread_renderer):
3584 post_renderer = try_get(
3585 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3586 if not post_renderer:
3587 return
3588 # video attachment
3589 video_renderer = try_get(
3590 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3591 video_id = video_renderer.get('videoId')
3592 if video_id:
3593 entry = self._extract_video(video_renderer)
3594 if entry:
3595 yield entry
3596 # playlist attachment
3597 playlist_id = try_get(
3598 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3599 if playlist_id:
3600 yield self.url_result(
3601 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3602 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3603 # inline video links
3604 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3605 for run in runs:
3606 if not isinstance(run, dict):
3607 continue
3608 ep_url = try_get(
3609 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3610 if not ep_url:
3611 continue
3612 if not YoutubeIE.suitable(ep_url):
3613 continue
3614 ep_video_id = YoutubeIE._match_id(ep_url)
3615 if video_id == ep_video_id:
3616 continue
3617 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3618
3619 def _post_thread_continuation_entries(self, post_thread_continuation):
3620 contents = post_thread_continuation.get('contents')
3621 if not isinstance(contents, list):
3622 return
3623 for content in contents:
3624 renderer = content.get('backstagePostThreadRenderer')
3625 if not isinstance(renderer, dict):
3626 continue
3627 for entry in self._post_thread_entries(renderer):
3628 yield entry
3629
3630 r''' # unused
3631 def _rich_grid_entries(self, contents):
3632 for content in contents:
3633 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3634 if video_renderer:
3635 entry = self._video_entry(video_renderer)
3636 if entry:
3637 yield entry
3638 '''
3639 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3640
3641 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3642 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3643 for content in contents:
3644 if not isinstance(content, dict):
3645 continue
3646 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3647 if not is_renderer:
3648 renderer = content.get('richItemRenderer')
3649 if renderer:
3650 for entry in self._rich_entries(renderer):
3651 yield entry
3652 continuation_list[0] = self._extract_continuation(parent_renderer)
3653 continue
3654 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3655 for isr_content in isr_contents:
3656 if not isinstance(isr_content, dict):
3657 continue
3658
3659 known_renderers = {
3660 'playlistVideoListRenderer': self._playlist_entries,
3661 'gridRenderer': self._grid_entries,
3662 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3663 'backstagePostThreadRenderer': self._post_thread_entries,
3664 'videoRenderer': lambda x: [self._video_entry(x)],
3665 }
3666 for key, renderer in isr_content.items():
3667 if key not in known_renderers:
3668 continue
3669 for entry in known_renderers[key](renderer):
3670 if entry:
3671 yield entry
3672 continuation_list[0] = self._extract_continuation(renderer)
3673 break
3674
3675 if not continuation_list[0]:
3676 continuation_list[0] = self._extract_continuation(is_renderer)
3677
3678 if not continuation_list[0]:
3679 continuation_list[0] = self._extract_continuation(parent_renderer)
3680
3681 continuation_list = [None] # Python 2 doesnot support nonlocal
3682 tab_content = try_get(tab, lambda x: x['content'], dict)
3683 if not tab_content:
3684 return
3685 parent_renderer = (
3686 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3687 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3688 for entry in extract_entries(parent_renderer):
3689 yield entry
3690 continuation = continuation_list[0]
3691 visitor_data = None
3692
3693 for page_num in itertools.count(1):
3694 if not continuation:
3695 break
3696 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3697 response = self._extract_response(
3698 item_id='%s page %s' % (item_id, page_num),
3699 query=continuation, headers=headers, ytcfg=ytcfg,
3700 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3701
3702 if not response:
3703 break
3704 visitor_data = try_get(
3705 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3706
3707 known_continuation_renderers = {
3708 'playlistVideoListContinuation': self._playlist_entries,
3709 'gridContinuation': self._grid_entries,
3710 'itemSectionContinuation': self._post_thread_continuation_entries,
3711 'sectionListContinuation': extract_entries, # for feeds
3712 }
3713 continuation_contents = try_get(
3714 response, lambda x: x['continuationContents'], dict) or {}
3715 continuation_renderer = None
3716 for key, value in continuation_contents.items():
3717 if key not in known_continuation_renderers:
3718 continue
3719 continuation_renderer = value
3720 continuation_list = [None]
3721 for entry in known_continuation_renderers[key](continuation_renderer):
3722 yield entry
3723 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3724 break
3725 if continuation_renderer:
3726 continue
3727
3728 known_renderers = {
3729 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3730 'gridVideoRenderer': (self._grid_entries, 'items'),
3731 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3732 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
3733 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
3734 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3735 }
3736 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3737 continuation_items = try_get(
3738 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3739 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3740 video_items_renderer = None
3741 for key, value in continuation_item.items():
3742 if key not in known_renderers:
3743 continue
3744 video_items_renderer = {known_renderers[key][1]: continuation_items}
3745 continuation_list = [None]
3746 for entry in known_renderers[key][0](video_items_renderer):
3747 yield entry
3748 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3749 break
3750 if video_items_renderer:
3751 continue
3752 break
3753
3754 @staticmethod
3755 def _extract_selected_tab(tabs):
3756 for tab in tabs:
3757 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3758 if renderer.get('selected') is True:
3759 return renderer
3760 else:
3761 raise ExtractorError('Unable to find selected tab')
3762
3763 @classmethod
3764 def _extract_uploader(cls, data):
3765 uploader = {}
3766 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3767 owner = try_get(
3768 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3769 if owner:
3770 uploader['uploader'] = owner.get('text')
3771 uploader['uploader_id'] = try_get(
3772 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3773 uploader['uploader_url'] = urljoin(
3774 'https://www.youtube.com/',
3775 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3776 return {k: v for k, v in uploader.items() if v is not None}
3777
3778 def _extract_from_tabs(self, item_id, webpage, data, tabs):
3779 playlist_id = title = description = channel_url = channel_name = channel_id = None
3780 thumbnails_list = tags = []
3781
3782 selected_tab = self._extract_selected_tab(tabs)
3783 renderer = try_get(
3784 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3785 if renderer:
3786 channel_name = renderer.get('title')
3787 channel_url = renderer.get('channelUrl')
3788 channel_id = renderer.get('externalId')
3789 else:
3790 renderer = try_get(
3791 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3792
3793 if renderer:
3794 title = renderer.get('title')
3795 description = renderer.get('description', '')
3796 playlist_id = channel_id
3797 tags = renderer.get('keywords', '').split()
3798 thumbnails_list = (
3799 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3800 or try_get(
3801 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3802 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3803 list)
3804 or [])
3805
3806 thumbnails = []
3807 for t in thumbnails_list:
3808 if not isinstance(t, dict):
3809 continue
3810 thumbnail_url = url_or_none(t.get('url'))
3811 if not thumbnail_url:
3812 continue
3813 thumbnails.append({
3814 'url': thumbnail_url,
3815 'width': int_or_none(t.get('width')),
3816 'height': int_or_none(t.get('height')),
3817 })
3818 if playlist_id is None:
3819 playlist_id = item_id
3820 if title is None:
3821 title = (
3822 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3823 or playlist_id)
3824 title += format_field(selected_tab, 'title', ' - %s')
3825 title += format_field(selected_tab, 'expandedText', ' - %s')
3826 metadata = {
3827 'playlist_id': playlist_id,
3828 'playlist_title': title,
3829 'playlist_description': description,
3830 'uploader': channel_name,
3831 'uploader_id': channel_id,
3832 'uploader_url': channel_url,
3833 'thumbnails': thumbnails,
3834 'tags': tags,
3835 }
3836 availability = self._extract_availability(data)
3837 if availability:
3838 metadata['availability'] = availability
3839 if not channel_id:
3840 metadata.update(self._extract_uploader(data))
3841 metadata.update({
3842 'channel': metadata['uploader'],
3843 'channel_id': metadata['uploader_id'],
3844 'channel_url': metadata['uploader_url']})
3845 ytcfg = self._extract_ytcfg(item_id, webpage)
3846 return self.playlist_result(
3847 self._entries(
3848 selected_tab, playlist_id,
3849 self._extract_identity_token(webpage, item_id),
3850 self._extract_account_syncid(ytcfg, data), ytcfg),
3851 **metadata)
3852
3853 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
3854 first_id = last_id = None
3855 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3856 headers = self._generate_api_headers(
3857 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3858 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
3859 for page_num in itertools.count(1):
3860 videos = list(self._playlist_entries(playlist))
3861 if not videos:
3862 return
3863 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3864 if start >= len(videos):
3865 return
3866 for video in videos[start:]:
3867 if video['id'] == first_id:
3868 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3869 return
3870 yield video
3871 first_id = first_id or videos[0]['id']
3872 last_id = videos[-1]['id']
3873 watch_endpoint = try_get(
3874 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3875 query = {
3876 'playlistId': playlist_id,
3877 'videoId': watch_endpoint.get('videoId') or last_id,
3878 'index': watch_endpoint.get('index') or len(videos),
3879 'params': watch_endpoint.get('params') or 'OAE%3D'
3880 }
3881 response = self._extract_response(
3882 item_id='%s page %d' % (playlist_id, page_num),
3883 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3884 check_get_keys='contents'
3885 )
3886 playlist = try_get(
3887 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3888
3889 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
3890 title = playlist.get('title') or try_get(
3891 data, lambda x: x['titleText']['simpleText'], compat_str)
3892 playlist_id = playlist.get('playlistId') or item_id
3893
3894 # Delegating everything except mix playlists to regular tab-based playlist URL
3895 playlist_url = urljoin(url, try_get(
3896 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3897 compat_str))
3898 if playlist_url and playlist_url != url:
3899 return self.url_result(
3900 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3901 video_title=title)
3902
3903 return self.playlist_result(
3904 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
3905 playlist_id=playlist_id, playlist_title=title)
3906
3907 def _extract_availability(self, data):
3908 """
3909 Gets the availability of a given playlist/tab.
3910 Note: Unless YouTube tells us explicitly, we do not assume it is public
3911 @param data: response
3912 """
3913 is_private = is_unlisted = None
3914 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3915 badge_labels = self._extract_badges(renderer)
3916
3917 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3918 privacy_dropdown_entries = try_get(
3919 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3920 for renderer_dict in privacy_dropdown_entries:
3921 is_selected = try_get(
3922 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3923 if not is_selected:
3924 continue
3925 label = self._get_text(
3926 try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label'], dict) or [])
3927 if label:
3928 badge_labels.add(label.lower())
3929 break
3930
3931 for badge_label in badge_labels:
3932 if badge_label == 'unlisted':
3933 is_unlisted = True
3934 elif badge_label == 'private':
3935 is_private = True
3936 elif badge_label == 'public':
3937 is_unlisted = is_private = False
3938 return self._availability(is_private, False, False, False, is_unlisted)
3939
3940 @staticmethod
3941 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3942 sidebar_renderer = try_get(
3943 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3944 for item in sidebar_renderer:
3945 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3946 if renderer:
3947 return renderer
3948
3949 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3950 """
3951 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3952 """
3953 browse_id = params = None
3954 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3955 if not renderer:
3956 return
3957 menu_renderer = try_get(
3958 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3959 for menu_item in menu_renderer:
3960 if not isinstance(menu_item, dict):
3961 continue
3962 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3963 text = try_get(
3964 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3965 if not text or text.lower() != 'show unavailable videos':
3966 continue
3967 browse_endpoint = try_get(
3968 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3969 browse_id = browse_endpoint.get('browseId')
3970 params = browse_endpoint.get('params')
3971 break
3972
3973 ytcfg = self._extract_ytcfg(item_id, webpage)
3974 headers = self._generate_api_headers(
3975 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3976 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3977 visitor_data=try_get(
3978 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3979 query = {
3980 'params': params or 'wgYCCAA=',
3981 'browseId': browse_id or 'VL%s' % item_id
3982 }
3983 return self._extract_response(
3984 item_id=item_id, headers=headers, query=query,
3985 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
3986 note='Downloading API JSON with unavailable videos')
3987
3988 def _extract_webpage(self, url, item_id):
3989 retries = self.get_param('extractor_retries', 3)
3990 count = -1
3991 last_error = 'Incomplete yt initial data recieved'
3992 while count < retries:
3993 count += 1
3994 # Sometimes youtube returns a webpage with incomplete ytInitialData
3995 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3996 if count:
3997 self.report_warning('%s. Retrying ...' % last_error)
3998 webpage = self._download_webpage(
3999 url, item_id,
4000 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4001 data = self._extract_yt_initial_data(item_id, webpage)
4002 if data.get('contents') or data.get('currentVideoEndpoint'):
4003 break
4004 # Extract alerts here only when there is error
4005 self._extract_and_report_alerts(data)
4006 if count >= retries:
4007 raise ExtractorError(last_error)
4008 return webpage, data
4009
4010 @staticmethod
4011 def _smuggle_data(entries, data):
4012 for entry in entries:
4013 if data:
4014 entry['url'] = smuggle_url(entry['url'], data)
4015 yield entry
4016
4017 def _real_extract(self, url):
4018 url, smuggled_data = unsmuggle_url(url, {})
4019 if self.is_music_url(url):
4020 smuggled_data['is_music_url'] = True
4021 info_dict = self.__real_extract(url, smuggled_data)
4022 if info_dict.get('entries'):
4023 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4024 return info_dict
4025
4026 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4027
4028 def __real_extract(self, url, smuggled_data):
4029 item_id = self._match_id(url)
4030 url = compat_urlparse.urlunparse(
4031 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4032 compat_opts = self.get_param('compat_opts', [])
4033
4034 def get_mobj(url):
4035 mobj = self._url_re.match(url).groupdict()
4036 mobj.update((k, '') for k, v in mobj.items() if v is None)
4037 return mobj
4038
4039 mobj = get_mobj(url)
4040 # Youtube returns incomplete data if tabname is not lower case
4041 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4042
4043 if is_channel:
4044 if smuggled_data.get('is_music_url'):
4045 if item_id[:2] == 'VL':
4046 # Youtube music VL channels have an equivalent playlist
4047 item_id = item_id[2:]
4048 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4049 elif item_id[:2] == 'MP':
4050 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4051 item_id = self._search_regex(
4052 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4053 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4054 'playlist id')
4055 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4056 elif mobj['channel_type'] == 'browse':
4057 # Youtube music /browse/ should be changed to /channel/
4058 pre = 'https://www.youtube.com/channel/%s' % item_id
4059 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4060 # Home URLs should redirect to /videos/
4061 self.report_warning(
4062 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4063 'To download only the videos in the home page, add a "/featured" to the URL')
4064 tab = '/videos'
4065
4066 url = ''.join((pre, tab, post))
4067 mobj = get_mobj(url)
4068
4069 # Handle both video/playlist URLs
4070 qs = parse_qs(url)
4071 video_id = qs.get('v', [None])[0]
4072 playlist_id = qs.get('list', [None])[0]
4073
4074 if not video_id and mobj['not_channel'].startswith('watch'):
4075 if not playlist_id:
4076 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4077 raise ExtractorError('Unable to recognize tab page')
4078 # Common mistake: https://www.youtube.com/watch?list=playlist_id
4079 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4080 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4081 mobj = get_mobj(url)
4082
4083 if video_id and playlist_id:
4084 if self.get_param('noplaylist'):
4085 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4086 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4087 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4088
4089 webpage, data = self._extract_webpage(url, item_id)
4090
4091 tabs = try_get(
4092 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4093 if tabs:
4094 selected_tab = self._extract_selected_tab(tabs)
4095 tab_name = selected_tab.get('title', '')
4096 if 'no-youtube-channel-redirect' not in compat_opts:
4097 if mobj['tab'] == '/live':
4098 # Live tab should have redirected to the video
4099 raise ExtractorError('The channel is not currently live', expected=True)
4100 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4101 if not mobj['not_channel'] and item_id[:2] == 'UC':
4102 # Topic channels don't have /videos. Use the equivalent playlist instead
4103 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4104 pl_id = 'UU%s' % item_id[2:]
4105 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4106 try:
4107 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4108 for alert_type, alert_message in self._extract_alerts(pl_data):
4109 if alert_type == 'error':
4110 raise ExtractorError('Youtube said: %s' % alert_message)
4111 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4112 except ExtractorError:
4113 self.report_warning('The playlist gave error. Falling back to channel URL')
4114 else:
4115 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4116
4117 self.write_debug('Final URL: %s' % url)
4118
4119 # YouTube sometimes provides a button to reload playlist with unavailable videos.
4120 if 'no-youtube-unavailable-videos' not in compat_opts:
4121 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4122 self._extract_and_report_alerts(data)
4123 tabs = try_get(
4124 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4125 if tabs:
4126 return self._extract_from_tabs(item_id, webpage, data, tabs)
4127
4128 playlist = try_get(
4129 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4130 if playlist:
4131 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4132
4133 video_id = try_get(
4134 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4135 compat_str) or video_id
4136 if video_id:
4137 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4138 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4139 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4140
4141 raise ExtractorError('Unable to recognize tab page')
4142
4143
4144 class YoutubePlaylistIE(InfoExtractor):
4145 IE_DESC = 'YouTube.com playlists'
4146 _VALID_URL = r'''(?x)(?:
4147 (?:https?://)?
4148 (?:\w+\.)?
4149 (?:
4150 (?:
4151 youtube(?:kids)?\.com|
4152 invidio\.us
4153 )
4154 /.*?\?.*?\blist=
4155 )?
4156 (?P<id>%(playlist_id)s)
4157 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4158 IE_NAME = 'youtube:playlist'
4159 _TESTS = [{
4160 'note': 'issue #673',
4161 'url': 'PLBB231211A4F62143',
4162 'info_dict': {
4163 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4164 'id': 'PLBB231211A4F62143',
4165 'uploader': 'Wickydoo',
4166 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4167 },
4168 'playlist_mincount': 29,
4169 }, {
4170 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4171 'info_dict': {
4172 'title': 'YDL_safe_search',
4173 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4174 },
4175 'playlist_count': 2,
4176 'skip': 'This playlist is private',
4177 }, {
4178 'note': 'embedded',
4179 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4180 'playlist_count': 4,
4181 'info_dict': {
4182 'title': 'JODA15',
4183 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4184 'uploader': 'milan',
4185 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4186 }
4187 }, {
4188 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4189 'playlist_mincount': 982,
4190 'info_dict': {
4191 'title': '2018 Chinese New Singles (11/6 updated)',
4192 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4193 'uploader': 'LBK',
4194 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4195 }
4196 }, {
4197 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4198 'only_matching': True,
4199 }, {
4200 # music album playlist
4201 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4202 'only_matching': True,
4203 }]
4204
4205 @classmethod
4206 def suitable(cls, url):
4207 if YoutubeTabIE.suitable(url):
4208 return False
4209 # Hack for lazy extractors until more generic solution is implemented
4210 # (see #28780)
4211 from .youtube import parse_qs
4212 qs = parse_qs(url)
4213 if qs.get('v', [None])[0]:
4214 return False
4215 return super(YoutubePlaylistIE, cls).suitable(url)
4216
4217 def _real_extract(self, url):
4218 playlist_id = self._match_id(url)
4219 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4220 url = update_url_query(
4221 'https://www.youtube.com/playlist',
4222 parse_qs(url) or {'list': playlist_id})
4223 if is_music_url:
4224 url = smuggle_url(url, {'is_music_url': True})
4225 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4226
4227
4228 class YoutubeYtBeIE(InfoExtractor):
4229 IE_DESC = 'youtu.be'
4230 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4231 _TESTS = [{
4232 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4233 'info_dict': {
4234 'id': 'yeWKywCrFtk',
4235 'ext': 'mp4',
4236 'title': 'Small Scale Baler and Braiding Rugs',
4237 'uploader': 'Backus-Page House Museum',
4238 'uploader_id': 'backuspagemuseum',
4239 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4240 'upload_date': '20161008',
4241 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4242 'categories': ['Nonprofits & Activism'],
4243 'tags': list,
4244 'like_count': int,
4245 'dislike_count': int,
4246 },
4247 'params': {
4248 'noplaylist': True,
4249 'skip_download': True,
4250 },
4251 }, {
4252 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4253 'only_matching': True,
4254 }]
4255
4256 def _real_extract(self, url):
4257 mobj = re.match(self._VALID_URL, url)
4258 video_id = mobj.group('id')
4259 playlist_id = mobj.group('playlist_id')
4260 return self.url_result(
4261 update_url_query('https://www.youtube.com/watch', {
4262 'v': video_id,
4263 'list': playlist_id,
4264 'feature': 'youtu.be',
4265 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4266
4267
4268 class YoutubeYtUserIE(InfoExtractor):
4269 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4270 _VALID_URL = r'ytuser:(?P<id>.+)'
4271 _TESTS = [{
4272 'url': 'ytuser:phihag',
4273 'only_matching': True,
4274 }]
4275
4276 def _real_extract(self, url):
4277 user_id = self._match_id(url)
4278 return self.url_result(
4279 'https://www.youtube.com/user/%s' % user_id,
4280 ie=YoutubeTabIE.ie_key(), video_id=user_id)
4281
4282
4283 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4284 IE_NAME = 'youtube:favorites'
4285 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4286 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4287 _LOGIN_REQUIRED = True
4288 _TESTS = [{
4289 'url': ':ytfav',
4290 'only_matching': True,
4291 }, {
4292 'url': ':ytfavorites',
4293 'only_matching': True,
4294 }]
4295
4296 def _real_extract(self, url):
4297 return self.url_result(
4298 'https://www.youtube.com/playlist?list=LL',
4299 ie=YoutubeTabIE.ie_key())
4300
4301
4302 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4303 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4304 # there doesn't appear to be a real limit, for example if you search for
4305 # 'python' you get more than 8.000.000 results
4306 _MAX_RESULTS = float('inf')
4307 IE_NAME = 'youtube:search'
4308 _SEARCH_KEY = 'ytsearch'
4309 _SEARCH_PARAMS = None
4310 _TESTS = []
4311
4312 def _entries(self, query, n):
4313 data = {'query': query}
4314 if self._SEARCH_PARAMS:
4315 data['params'] = self._SEARCH_PARAMS
4316 total = 0
4317 continuation = {}
4318 for page_num in itertools.count(1):
4319 data.update(continuation)
4320 search = self._extract_response(
4321 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4322 check_get_keys=('contents', 'onResponseReceivedCommands')
4323 )
4324 if not search:
4325 break
4326 slr_contents = try_get(
4327 search,
4328 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4329 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4330 list)
4331 if not slr_contents:
4332 break
4333
4334 # Youtube sometimes adds promoted content to searches,
4335 # changing the index location of videos and token.
4336 # So we search through all entries till we find them.
4337 continuation = None
4338 for slr_content in slr_contents:
4339 if not continuation:
4340 continuation = self._extract_continuation({'contents': [slr_content]})
4341
4342 isr_contents = try_get(
4343 slr_content,
4344 lambda x: x['itemSectionRenderer']['contents'],
4345 list)
4346 if not isr_contents:
4347 continue
4348 for content in isr_contents:
4349 if not isinstance(content, dict):
4350 continue
4351 video = content.get('videoRenderer')
4352 if not isinstance(video, dict):
4353 continue
4354 video_id = video.get('videoId')
4355 if not video_id:
4356 continue
4357
4358 yield self._extract_video(video)
4359 total += 1
4360 if total == n:
4361 return
4362
4363 if not continuation:
4364 break
4365
4366 def _get_n_results(self, query, n):
4367 """Get a specified number of results for a query"""
4368 return self.playlist_result(self._entries(query, n), query)
4369
4370
4371 class YoutubeSearchDateIE(YoutubeSearchIE):
4372 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4373 _SEARCH_KEY = 'ytsearchdate'
4374 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4375 _SEARCH_PARAMS = 'CAI%3D'
4376
4377
4378 class YoutubeSearchURLIE(YoutubeSearchIE):
4379 IE_DESC = 'YouTube.com search URLs'
4380 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4381 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4382 # _MAX_RESULTS = 100
4383 _TESTS = [{
4384 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4385 'playlist_mincount': 5,
4386 'info_dict': {
4387 'title': 'youtube-dl test video',
4388 }
4389 }, {
4390 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4391 'only_matching': True,
4392 }]
4393
4394 @classmethod
4395 def _make_valid_url(cls):
4396 return cls._VALID_URL
4397
4398 def _real_extract(self, url):
4399 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4400 query = (qs.get('search_query') or qs.get('q'))[0]
4401 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4402 return self._get_n_results(query, self._MAX_RESULTS)
4403
4404
4405 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4406 """
4407 Base class for feed extractors
4408 Subclasses must define the _FEED_NAME property.
4409 """
4410 _LOGIN_REQUIRED = True
4411 _TESTS = []
4412
4413 @property
4414 def IE_NAME(self):
4415 return 'youtube:%s' % self._FEED_NAME
4416
4417 def _real_extract(self, url):
4418 return self.url_result(
4419 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4420 ie=YoutubeTabIE.ie_key())
4421
4422
4423 class YoutubeWatchLaterIE(InfoExtractor):
4424 IE_NAME = 'youtube:watchlater'
4425 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4426 _VALID_URL = r':ytwatchlater'
4427 _TESTS = [{
4428 'url': ':ytwatchlater',
4429 'only_matching': True,
4430 }]
4431
4432 def _real_extract(self, url):
4433 return self.url_result(
4434 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4435
4436
4437 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4438 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4439 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4440 _FEED_NAME = 'recommended'
4441 _LOGIN_REQUIRED = False
4442 _TESTS = [{
4443 'url': ':ytrec',
4444 'only_matching': True,
4445 }, {
4446 'url': ':ytrecommended',
4447 'only_matching': True,
4448 }, {
4449 'url': 'https://youtube.com',
4450 'only_matching': True,
4451 }]
4452
4453
4454 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4455 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4456 _VALID_URL = r':ytsub(?:scription)?s?'
4457 _FEED_NAME = 'subscriptions'
4458 _TESTS = [{
4459 'url': ':ytsubs',
4460 'only_matching': True,
4461 }, {
4462 'url': ':ytsubscriptions',
4463 'only_matching': True,
4464 }]
4465
4466
4467 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4468 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4469 _VALID_URL = r':ythis(?:tory)?'
4470 _FEED_NAME = 'history'
4471 _TESTS = [{
4472 'url': ':ythistory',
4473 'only_matching': True,
4474 }]
4475
4476
4477 class YoutubeTruncatedURLIE(InfoExtractor):
4478 IE_NAME = 'youtube:truncated_url'
4479 IE_DESC = False # Do not list
4480 _VALID_URL = r'''(?x)
4481 (?:https?://)?
4482 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4483 (?:watch\?(?:
4484 feature=[a-z_]+|
4485 annotation_id=annotation_[^&]+|
4486 x-yt-cl=[0-9]+|
4487 hl=[^&]*|
4488 t=[0-9]+
4489 )?
4490 |
4491 attribution_link\?a=[^&]+
4492 )
4493 $
4494 '''
4495
4496 _TESTS = [{
4497 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4498 'only_matching': True,
4499 }, {
4500 'url': 'https://www.youtube.com/watch?',
4501 'only_matching': True,
4502 }, {
4503 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4504 'only_matching': True,
4505 }, {
4506 'url': 'https://www.youtube.com/watch?feature=foo',
4507 'only_matching': True,
4508 }, {
4509 'url': 'https://www.youtube.com/watch?hl=en-GB',
4510 'only_matching': True,
4511 }, {
4512 'url': 'https://www.youtube.com/watch?t=2372',
4513 'only_matching': True,
4514 }]
4515
4516 def _real_extract(self, url):
4517 raise ExtractorError(
4518 'Did you forget to quote the URL? Remember that & is a meta '
4519 'character in most shells, so you want to put the URL in quotes, '
4520 'like youtube-dl '
4521 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4522 ' or simply youtube-dl BaW_jenozKc .',
4523 expected=True)
4524
4525
4526 class YoutubeTruncatedIDIE(InfoExtractor):
4527 IE_NAME = 'youtube:truncated_id'
4528 IE_DESC = False # Do not list
4529 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4530
4531 _TESTS = [{
4532 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4533 'only_matching': True,
4534 }]
4535
4536 def _real_extract(self, url):
4537 video_id = self._match_id(url)
4538 raise ExtractorError(
4539 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4540 expected=True)