]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/extractor/youtube.py
[youtube:comments] Improve comment vote count parsing (fixes #506) (#508)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
... / ...
CommitLineData
1# coding: utf-8
2
3from __future__ import unicode_literals
4
5import base64
6import calendar
7import copy
8import hashlib
9import itertools
10import json
11import os.path
12import random
13import re
14import time
15import traceback
16
17from .common import InfoExtractor, SearchInfoExtractor
18from ..compat import (
19 compat_chr,
20 compat_HTTPError,
21 compat_parse_qs,
22 compat_str,
23 compat_urllib_parse_unquote_plus,
24 compat_urllib_parse_urlencode,
25 compat_urllib_parse_urlparse,
26 compat_urlparse,
27)
28from ..jsinterp import JSInterpreter
29from ..utils import (
30 bool_or_none,
31 bytes_to_intlist,
32 clean_html,
33 dict_get,
34 datetime_from_str,
35 error_to_compat_str,
36 ExtractorError,
37 format_field,
38 float_or_none,
39 int_or_none,
40 intlist_to_bytes,
41 mimetype2ext,
42 parse_codecs,
43 parse_count,
44 parse_duration,
45 qualities,
46 remove_start,
47 smuggle_url,
48 str_or_none,
49 str_to_int,
50 try_get,
51 unescapeHTML,
52 unified_strdate,
53 unsmuggle_url,
54 update_url_query,
55 url_or_none,
56 urlencode_postdata,
57 urljoin
58)
59
60
61def parse_qs(url):
62 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
63
64
65class YoutubeBaseInfoExtractor(InfoExtractor):
66 """Provide base functions for Youtube extractors"""
67 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
68 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
69
70 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
71 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
72 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
73
74 _RESERVED_NAMES = (
75 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
76 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
77 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
78
79 _NETRC_MACHINE = 'youtube'
80 # If True it will raise an error if no login info is provided
81 _LOGIN_REQUIRED = False
82
83 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
84
85 def _login(self):
86 """
87 Attempt to log in to YouTube.
88 True is returned if successful or skipped.
89 False is returned if login failed.
90
91 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
92 """
93
94 def warn(message):
95 self.report_warning(message)
96
97 # username+password login is broken
98 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
99 self.raise_login_required(
100 'Login details are needed to download this content', method='cookies')
101 username, password = self._get_login_info()
102 if username:
103 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
104 return
105
106 # Everything below this is broken!
107 r'''
108 # No authentication to be performed
109 if username is None:
110 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
111 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
112 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
113 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
114 return True
115
116 login_page = self._download_webpage(
117 self._LOGIN_URL, None,
118 note='Downloading login page',
119 errnote='unable to fetch login page', fatal=False)
120 if login_page is False:
121 return
122
123 login_form = self._hidden_inputs(login_page)
124
125 def req(url, f_req, note, errnote):
126 data = login_form.copy()
127 data.update({
128 'pstMsg': 1,
129 'checkConnection': 'youtube',
130 'checkedDomains': 'youtube',
131 'hl': 'en',
132 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
133 'f.req': json.dumps(f_req),
134 'flowName': 'GlifWebSignIn',
135 'flowEntry': 'ServiceLogin',
136 # TODO: reverse actual botguard identifier generation algo
137 'bgRequest': '["identifier",""]',
138 })
139 return self._download_json(
140 url, None, note=note, errnote=errnote,
141 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
142 fatal=False,
143 data=urlencode_postdata(data), headers={
144 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
145 'Google-Accounts-XSRF': 1,
146 })
147
148 lookup_req = [
149 username,
150 None, [], None, 'US', None, None, 2, False, True,
151 [
152 None, None,
153 [2, 1, None, 1,
154 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
155 None, [], 4],
156 1, [None, None, []], None, None, None, True
157 ],
158 username,
159 ]
160
161 lookup_results = req(
162 self._LOOKUP_URL, lookup_req,
163 'Looking up account info', 'Unable to look up account info')
164
165 if lookup_results is False:
166 return False
167
168 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
169 if not user_hash:
170 warn('Unable to extract user hash')
171 return False
172
173 challenge_req = [
174 user_hash,
175 None, 1, None, [1, None, None, None, [password, None, True]],
176 [
177 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
178 1, [None, None, []], None, None, None, True
179 ]]
180
181 challenge_results = req(
182 self._CHALLENGE_URL, challenge_req,
183 'Logging in', 'Unable to log in')
184
185 if challenge_results is False:
186 return
187
188 login_res = try_get(challenge_results, lambda x: x[0][5], list)
189 if login_res:
190 login_msg = try_get(login_res, lambda x: x[5], compat_str)
191 warn(
192 'Unable to login: %s' % 'Invalid password'
193 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
194 return False
195
196 res = try_get(challenge_results, lambda x: x[0][-1], list)
197 if not res:
198 warn('Unable to extract result entry')
199 return False
200
201 login_challenge = try_get(res, lambda x: x[0][0], list)
202 if login_challenge:
203 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
204 if challenge_str == 'TWO_STEP_VERIFICATION':
205 # SEND_SUCCESS - TFA code has been successfully sent to phone
206 # QUOTA_EXCEEDED - reached the limit of TFA codes
207 status = try_get(login_challenge, lambda x: x[5], compat_str)
208 if status == 'QUOTA_EXCEEDED':
209 warn('Exceeded the limit of TFA codes, try later')
210 return False
211
212 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
213 if not tl:
214 warn('Unable to extract TL')
215 return False
216
217 tfa_code = self._get_tfa_info('2-step verification code')
218
219 if not tfa_code:
220 warn(
221 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
222 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
223 return False
224
225 tfa_code = remove_start(tfa_code, 'G-')
226
227 tfa_req = [
228 user_hash, None, 2, None,
229 [
230 9, None, None, None, None, None, None, None,
231 [None, tfa_code, True, 2]
232 ]]
233
234 tfa_results = req(
235 self._TFA_URL.format(tl), tfa_req,
236 'Submitting TFA code', 'Unable to submit TFA code')
237
238 if tfa_results is False:
239 return False
240
241 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
242 if tfa_res:
243 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
244 warn(
245 'Unable to finish TFA: %s' % 'Invalid TFA code'
246 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
247 return False
248
249 check_cookie_url = try_get(
250 tfa_results, lambda x: x[0][-1][2], compat_str)
251 else:
252 CHALLENGES = {
253 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
254 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
255 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
256 }
257 challenge = CHALLENGES.get(
258 challenge_str,
259 '%s returned error %s.' % (self.IE_NAME, challenge_str))
260 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
261 return False
262 else:
263 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
264
265 if not check_cookie_url:
266 warn('Unable to extract CheckCookie URL')
267 return False
268
269 check_cookie_results = self._download_webpage(
270 check_cookie_url, None, 'Checking cookie', fatal=False)
271
272 if check_cookie_results is False:
273 return False
274
275 if 'https://myaccount.google.com/' not in check_cookie_results:
276 warn('Unable to log in')
277 return False
278
279 return True
280 '''
281
282 def _initialize_consent(self):
283 cookies = self._get_cookies('https://www.youtube.com/')
284 if cookies.get('__Secure-3PSID'):
285 return
286 consent_id = None
287 consent = cookies.get('CONSENT')
288 if consent:
289 if 'YES' in consent.value:
290 return
291 consent_id = self._search_regex(
292 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
293 if not consent_id:
294 consent_id = random.randint(100, 999)
295 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
296
297 def _real_initialize(self):
298 self._initialize_consent()
299 if self._downloader is None:
300 return
301 if not self._login():
302 return
303
304 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
305 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
306 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
307
308 _YT_DEFAULT_YTCFGS = {
309 'WEB': {
310 'INNERTUBE_API_VERSION': 'v1',
311 'INNERTUBE_CLIENT_NAME': 'WEB',
312 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
313 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
314 'INNERTUBE_CONTEXT': {
315 'client': {
316 'clientName': 'WEB',
317 'clientVersion': '2.20210622.10.00',
318 'hl': 'en',
319 }
320 },
321 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
322 },
323 'WEB_REMIX': {
324 'INNERTUBE_API_VERSION': 'v1',
325 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
326 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
327 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
328 'INNERTUBE_CONTEXT': {
329 'client': {
330 'clientName': 'WEB_REMIX',
331 'clientVersion': '1.20210621.00.00',
332 'hl': 'en',
333 }
334 },
335 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
336 },
337 'WEB_EMBEDDED_PLAYER': {
338 'INNERTUBE_API_VERSION': 'v1',
339 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
340 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
341 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
342 'INNERTUBE_CONTEXT': {
343 'client': {
344 'clientName': 'WEB_EMBEDDED_PLAYER',
345 'clientVersion': '1.20210620.0.1',
346 'hl': 'en',
347 }
348 },
349 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
350 },
351 'ANDROID': {
352 'INNERTUBE_API_VERSION': 'v1',
353 'INNERTUBE_CLIENT_NAME': 'ANDROID',
354 'INNERTUBE_CLIENT_VERSION': '16.20',
355 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
356 'INNERTUBE_CONTEXT': {
357 'client': {
358 'clientName': 'ANDROID',
359 'clientVersion': '16.20',
360 'hl': 'en',
361 }
362 },
363 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID'
364 },
365 'ANDROID_EMBEDDED_PLAYER': {
366 'INNERTUBE_API_VERSION': 'v1',
367 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
368 'INNERTUBE_CLIENT_VERSION': '16.20',
369 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
370 'INNERTUBE_CONTEXT': {
371 'client': {
372 'clientName': 'ANDROID_EMBEDDED_PLAYER',
373 'clientVersion': '16.20',
374 'hl': 'en',
375 }
376 },
377 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER'
378 },
379 'ANDROID_MUSIC': {
380 'INNERTUBE_API_VERSION': 'v1',
381 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
382 'INNERTUBE_CLIENT_VERSION': '4.32',
383 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
384 'INNERTUBE_CONTEXT': {
385 'client': {
386 'clientName': 'ANDROID_MUSIC',
387 'clientVersion': '4.32',
388 'hl': 'en',
389 }
390 },
391 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_MUSIC'
392 }
393 }
394
395 _YT_DEFAULT_INNERTUBE_HOSTS = {
396 'DIRECT': 'youtubei.googleapis.com',
397 'WEB': 'www.youtube.com',
398 'WEB_REMIX': 'music.youtube.com',
399 'ANDROID_MUSIC': 'music.youtube.com'
400 }
401
402 def _get_default_ytcfg(self, client='WEB'):
403 if client in self._YT_DEFAULT_YTCFGS:
404 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
405 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
406 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
407
408 def _get_innertube_host(self, client='WEB'):
409 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
410
411 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
412 # try_get but with fallback to default ytcfg client values when present
413 _func = lambda y: try_get(y, getter, expected_type)
414 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
415
416 def _extract_client_name(self, ytcfg, default_client='WEB'):
417 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
418
419 def _extract_client_version(self, ytcfg, default_client='WEB'):
420 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
421
422 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
423 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
424
425 def _extract_context(self, ytcfg=None, default_client='WEB'):
426 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
427 context = _get_context(ytcfg)
428 if context:
429 return context
430
431 context = _get_context(self._get_default_ytcfg(default_client))
432 if not ytcfg:
433 return context
434
435 # Recreate the client context (required)
436 context['client'].update({
437 'clientVersion': self._extract_client_version(ytcfg, default_client),
438 'clientName': self._extract_client_name(ytcfg, default_client),
439 })
440 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
441 if visitor_data:
442 context['client']['visitorData'] = visitor_data
443 return context
444
445 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
446 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
447 # See: https://github.com/yt-dlp/yt-dlp/issues/393
448 yt_cookies = self._get_cookies('https://www.youtube.com')
449 sapisid_cookie = dict_get(
450 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
451 if sapisid_cookie is None:
452 return
453 time_now = round(time.time())
454 # SAPISID cookie is required if not already present
455 if not yt_cookies.get('SAPISID'):
456 self._set_cookie(
457 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
458 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
459 sapisidhash = hashlib.sha1(
460 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
461 return f'SAPISIDHASH {time_now}_{sapisidhash}'
462
463 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
464 note='Downloading API JSON', errnote='Unable to download API page',
465 context=None, api_key=None, api_hostname=None, default_client='WEB'):
466
467 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
468 data.update(query)
469 real_headers = self._generate_api_headers(client=default_client)
470 real_headers.update({'content-type': 'application/json'})
471 if headers:
472 real_headers.update(headers)
473 return self._download_json(
474 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
475 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
476 data=json.dumps(data).encode('utf8'), headers=real_headers,
477 query={'key': api_key or self._extract_api_key()})
478
479 def _extract_yt_initial_data(self, video_id, webpage):
480 return self._parse_json(
481 self._search_regex(
482 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
483 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
484 video_id)
485
486 def _extract_identity_token(self, webpage, item_id):
487 ytcfg = self._extract_ytcfg(item_id, webpage)
488 if ytcfg:
489 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
490 if token:
491 return token
492 return self._search_regex(
493 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
494 'identity token', default=None)
495
496 @staticmethod
497 def _extract_account_syncid(data):
498 """
499 Extract syncId required to download private playlists of secondary channels
500 @param data Either response or ytcfg
501 """
502 sync_ids = (try_get(
503 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
504 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
505 if len(sync_ids) >= 2 and sync_ids[1]:
506 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
507 # and just "user_syncid||" for primary channel. We only want the channel_syncid
508 return sync_ids[0]
509 # ytcfg includes channel_syncid if on secondary channel
510 return data.get('DELEGATED_SESSION_ID')
511
512 def _extract_ytcfg(self, video_id, webpage):
513 if not webpage:
514 return {}
515 return self._parse_json(
516 self._search_regex(
517 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
518 default='{}'), video_id, fatal=False) or {}
519
520 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
521 visitor_data=None, api_hostname=None, client='WEB'):
522 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
523 headers = {
524 'X-YouTube-Client-Name': compat_str(
525 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
526 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
527 'Origin': origin
528 }
529 if not visitor_data and ytcfg:
530 visitor_data = try_get(
531 self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)
532 if identity_token:
533 headers['X-Youtube-Identity-Token'] = identity_token
534 if account_syncid:
535 headers['X-Goog-PageId'] = account_syncid
536 headers['X-Goog-AuthUser'] = 0
537 if visitor_data:
538 headers['X-Goog-Visitor-Id'] = visitor_data
539 auth = self._generate_sapisidhash_header(origin)
540 if auth is not None:
541 headers['Authorization'] = auth
542 headers['X-Origin'] = origin
543 return headers
544
545 @staticmethod
546 def _build_api_continuation_query(continuation, ctp=None):
547 query = {
548 'continuation': continuation
549 }
550 # TODO: Inconsistency with clickTrackingParams.
551 # Currently we have a fixed ctp contained within context (from ytcfg)
552 # and a ctp in root query for continuation.
553 if ctp:
554 query['clickTracking'] = {'clickTrackingParams': ctp}
555 return query
556
557 @classmethod
558 def _continuation_query_ajax_to_api(cls, continuation_query):
559 continuation = dict_get(continuation_query, ('continuation', 'ctoken'))
560 return cls._build_api_continuation_query(continuation, continuation_query.get('itct'))
561
562 @staticmethod
563 def _build_continuation_query(continuation, ctp=None):
564 query = {
565 'ctoken': continuation,
566 'continuation': continuation,
567 }
568 if ctp:
569 query['itct'] = ctp
570 return query
571
572 @classmethod
573 def _extract_next_continuation_data(cls, renderer):
574 next_continuation = try_get(
575 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
576 lambda x: x['continuation']['reloadContinuationData']), dict)
577 if not next_continuation:
578 return
579 continuation = next_continuation.get('continuation')
580 if not continuation:
581 return
582 ctp = next_continuation.get('clickTrackingParams')
583 return cls._build_continuation_query(continuation, ctp)
584
585 @classmethod
586 def _extract_continuation_ep_data(cls, continuation_ep: dict):
587 if isinstance(continuation_ep, dict):
588 continuation = try_get(
589 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
590 if not continuation:
591 return
592 ctp = continuation_ep.get('clickTrackingParams')
593 return cls._build_continuation_query(continuation, ctp)
594
595 @classmethod
596 def _extract_continuation(cls, renderer):
597 next_continuation = cls._extract_next_continuation_data(renderer)
598 if next_continuation:
599 return next_continuation
600 contents = []
601 for key in ('contents', 'items'):
602 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
603 for content in contents:
604 if not isinstance(content, dict):
605 continue
606 continuation_ep = try_get(
607 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
608 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
609 dict)
610 continuation = cls._extract_continuation_ep_data(continuation_ep)
611 if continuation:
612 return continuation
613
614 @staticmethod
615 def _extract_alerts(data):
616 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
617 if not isinstance(alert_dict, dict):
618 continue
619 for alert in alert_dict.values():
620 alert_type = alert.get('type')
621 if not alert_type:
622 continue
623 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
624 if message:
625 yield alert_type, message
626 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
627 message += try_get(run, lambda x: x['text'], compat_str)
628 if message:
629 yield alert_type, message
630
631 def _report_alerts(self, alerts, expected=True):
632 errors = []
633 warnings = []
634 for alert_type, alert_message in alerts:
635 if alert_type.lower() == 'error':
636 errors.append([alert_type, alert_message])
637 else:
638 warnings.append([alert_type, alert_message])
639
640 for alert_type, alert_message in (warnings + errors[:-1]):
641 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
642 if errors:
643 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
644
645 def _extract_and_report_alerts(self, data, *args, **kwargs):
646 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
647
648 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
649 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
650 default_client='WEB'):
651 response = None
652 last_error = None
653 count = -1
654 retries = self.get_param('extractor_retries', 3)
655 if check_get_keys is None:
656 check_get_keys = []
657 while count < retries:
658 count += 1
659 if last_error:
660 self.report_warning('%s. Retrying ...' % last_error)
661 try:
662 response = self._call_api(
663 ep=ep, fatal=True, headers=headers,
664 video_id=item_id, query=query,
665 context=self._extract_context(ytcfg, default_client),
666 api_key=self._extract_api_key(ytcfg, default_client),
667 api_hostname=api_hostname, default_client=default_client,
668 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
669 except ExtractorError as e:
670 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
671 # Downloading page may result in intermittent 5xx HTTP error
672 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
673 last_error = 'HTTP Error %s' % e.cause.code
674 if count < retries:
675 continue
676 if fatal:
677 raise
678 else:
679 self.report_warning(error_to_compat_str(e))
680 return
681
682 else:
683 # Youtube may send alerts if there was an issue with the continuation page
684 try:
685 self._extract_and_report_alerts(response, expected=False)
686 except ExtractorError as e:
687 if fatal:
688 raise
689 self.report_warning(error_to_compat_str(e))
690 return
691 if not check_get_keys or dict_get(response, check_get_keys):
692 break
693 # Youtube sometimes sends incomplete data
694 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
695 last_error = 'Incomplete data received'
696 if count >= retries:
697 if fatal:
698 raise ExtractorError(last_error)
699 else:
700 self.report_warning(last_error)
701 return
702 return response
703
704 @staticmethod
705 def is_music_url(url):
706 return re.match(r'https?://music\.youtube\.com/', url) is not None
707
708 def _extract_video(self, renderer):
709 video_id = renderer.get('videoId')
710 title = try_get(
711 renderer,
712 (lambda x: x['title']['runs'][0]['text'],
713 lambda x: x['title']['simpleText']), compat_str)
714 description = try_get(
715 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
716 compat_str)
717 duration = parse_duration(try_get(
718 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
719 view_count_text = try_get(
720 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
721 view_count = str_to_int(self._search_regex(
722 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
723 'view count', default=None))
724 uploader = try_get(
725 renderer,
726 (lambda x: x['ownerText']['runs'][0]['text'],
727 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
728 return {
729 '_type': 'url',
730 'ie_key': YoutubeIE.ie_key(),
731 'id': video_id,
732 'url': video_id,
733 'title': title,
734 'description': description,
735 'duration': duration,
736 'view_count': view_count,
737 'uploader': uploader,
738 }
739
740
741class YoutubeIE(YoutubeBaseInfoExtractor):
742 IE_DESC = 'YouTube.com'
743 _INVIDIOUS_SITES = (
744 # invidious-redirect websites
745 r'(?:www\.)?redirect\.invidious\.io',
746 r'(?:(?:www|dev)\.)?invidio\.us',
747 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
748 r'(?:www\.)?invidious\.pussthecat\.org',
749 r'(?:www\.)?invidious\.zee\.li',
750 r'(?:www\.)?invidious\.ethibox\.fr',
751 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
752 # youtube-dl invidious instances list
753 r'(?:(?:www|no)\.)?invidiou\.sh',
754 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
755 r'(?:www\.)?invidious\.kabi\.tk',
756 r'(?:www\.)?invidious\.mastodon\.host',
757 r'(?:www\.)?invidious\.zapashcanon\.fr',
758 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
759 r'(?:www\.)?invidious\.tinfoil-hat\.net',
760 r'(?:www\.)?invidious\.himiko\.cloud',
761 r'(?:www\.)?invidious\.reallyancient\.tech',
762 r'(?:www\.)?invidious\.tube',
763 r'(?:www\.)?invidiou\.site',
764 r'(?:www\.)?invidious\.site',
765 r'(?:www\.)?invidious\.xyz',
766 r'(?:www\.)?invidious\.nixnet\.xyz',
767 r'(?:www\.)?invidious\.048596\.xyz',
768 r'(?:www\.)?invidious\.drycat\.fr',
769 r'(?:www\.)?inv\.skyn3t\.in',
770 r'(?:www\.)?tube\.poal\.co',
771 r'(?:www\.)?tube\.connect\.cafe',
772 r'(?:www\.)?vid\.wxzm\.sx',
773 r'(?:www\.)?vid\.mint\.lgbt',
774 r'(?:www\.)?vid\.puffyan\.us',
775 r'(?:www\.)?yewtu\.be',
776 r'(?:www\.)?yt\.elukerio\.org',
777 r'(?:www\.)?yt\.lelux\.fi',
778 r'(?:www\.)?invidious\.ggc-project\.de',
779 r'(?:www\.)?yt\.maisputain\.ovh',
780 r'(?:www\.)?ytprivate\.com',
781 r'(?:www\.)?invidious\.13ad\.de',
782 r'(?:www\.)?invidious\.toot\.koeln',
783 r'(?:www\.)?invidious\.fdn\.fr',
784 r'(?:www\.)?watch\.nettohikari\.com',
785 r'(?:www\.)?invidious\.namazso\.eu',
786 r'(?:www\.)?invidious\.silkky\.cloud',
787 r'(?:www\.)?invidious\.exonip\.de',
788 r'(?:www\.)?invidious\.riverside\.rocks',
789 r'(?:www\.)?invidious\.blamefran\.net',
790 r'(?:www\.)?invidious\.moomoo\.de',
791 r'(?:www\.)?ytb\.trom\.tf',
792 r'(?:www\.)?yt\.cyberhost\.uk',
793 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
794 r'(?:www\.)?qklhadlycap4cnod\.onion',
795 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
796 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
797 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
798 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
799 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
800 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
801 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
802 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
803 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
804 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
805 )
806 _VALID_URL = r"""(?x)^
807 (
808 (?:https?://|//) # http(s):// or protocol-independent URL
809 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
810 (?:www\.)?deturl\.com/www\.youtube\.com|
811 (?:www\.)?pwnyoutube\.com|
812 (?:www\.)?hooktube\.com|
813 (?:www\.)?yourepeat\.com|
814 tube\.majestyc\.net|
815 %(invidious)s|
816 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
817 (?:.*?\#/)? # handle anchor (#/) redirect urls
818 (?: # the various things that can precede the ID:
819 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
820 |(?: # or the v= param in all its forms
821 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
822 (?:\?|\#!?) # the params delimiter ? or # or #!
823 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
824 v=
825 )
826 ))
827 |(?:
828 youtu\.be| # just youtu.be/xxxx
829 vid\.plus| # or vid.plus/xxxx
830 zwearz\.com/watch| # or zwearz.com/watch/xxxx
831 %(invidious)s
832 )/
833 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
834 )
835 )? # all until now is optional -> you can pass the naked ID
836 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
837 (?(1).+)? # if we found the ID, everything can follow
838 (?:\#|$)""" % {
839 'invidious': '|'.join(_INVIDIOUS_SITES),
840 }
841 _PLAYER_INFO_RE = (
842 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
843 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
844 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
845 )
846 _formats = {
847 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
848 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
849 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
850 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
851 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
852 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
853 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
854 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
855 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
856 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
857 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
858 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
859 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
860 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
861 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
862 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
863 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
864 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
865
866
867 # 3D videos
868 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
869 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
870 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
871 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
872 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
873 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
874 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
875
876 # Apple HTTP Live Streaming
877 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
878 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
879 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
880 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
881 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
882 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
883 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
884 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
885
886 # DASH mp4 video
887 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
888 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
889 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
890 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
891 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
892 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
893 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
894 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
895 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
896 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
897 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
898 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
899
900 # Dash mp4 audio
901 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
902 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
903 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
904 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
905 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
906 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
907 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
908
909 # Dash webm
910 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
911 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
912 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
913 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
914 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
915 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
916 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
917 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
918 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
919 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
920 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
921 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
922 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
923 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
924 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
925 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
926 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
927 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
928 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
929 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
930 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
931 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
932
933 # Dash webm audio
934 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
935 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
936
937 # Dash webm audio with opus inside
938 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
939 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
940 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
941
942 # RTMP (unnamed)
943 '_rtmp': {'protocol': 'rtmp'},
944
945 # av01 video only formats sometimes served with "unknown" codecs
946 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
947 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
948 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
949 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
950 }
951 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
952
953 _AGE_GATE_REASONS = (
954 'Sign in to confirm your age',
955 'This video may be inappropriate for some users.',
956 'Sorry, this content is age-restricted.')
957
958 _GEO_BYPASS = False
959
960 IE_NAME = 'youtube'
961 _TESTS = [
962 {
963 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
964 'info_dict': {
965 'id': 'BaW_jenozKc',
966 'ext': 'mp4',
967 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
968 'uploader': 'Philipp Hagemeister',
969 'uploader_id': 'phihag',
970 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
971 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
972 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
973 'upload_date': '20121002',
974 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
975 'categories': ['Science & Technology'],
976 'tags': ['youtube-dl'],
977 'duration': 10,
978 'view_count': int,
979 'like_count': int,
980 'dislike_count': int,
981 'start_time': 1,
982 'end_time': 9,
983 }
984 },
985 {
986 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
987 'note': 'Embed-only video (#1746)',
988 'info_dict': {
989 'id': 'yZIXLfi8CZQ',
990 'ext': 'mp4',
991 'upload_date': '20120608',
992 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
993 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
994 'uploader': 'SET India',
995 'uploader_id': 'setindia',
996 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
997 'age_limit': 18,
998 },
999 'skip': 'Private video',
1000 },
1001 {
1002 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1003 'note': 'Use the first video ID in the URL',
1004 'info_dict': {
1005 'id': 'BaW_jenozKc',
1006 'ext': 'mp4',
1007 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1008 'uploader': 'Philipp Hagemeister',
1009 'uploader_id': 'phihag',
1010 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1011 'upload_date': '20121002',
1012 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1013 'categories': ['Science & Technology'],
1014 'tags': ['youtube-dl'],
1015 'duration': 10,
1016 'view_count': int,
1017 'like_count': int,
1018 'dislike_count': int,
1019 },
1020 'params': {
1021 'skip_download': True,
1022 },
1023 },
1024 {
1025 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1026 'note': '256k DASH audio (format 141) via DASH manifest',
1027 'info_dict': {
1028 'id': 'a9LDPn-MO4I',
1029 'ext': 'm4a',
1030 'upload_date': '20121002',
1031 'uploader_id': '8KVIDEO',
1032 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1033 'description': '',
1034 'uploader': '8KVIDEO',
1035 'title': 'UHDTV TEST 8K VIDEO.mp4'
1036 },
1037 'params': {
1038 'youtube_include_dash_manifest': True,
1039 'format': '141',
1040 },
1041 'skip': 'format 141 not served anymore',
1042 },
1043 # DASH manifest with encrypted signature
1044 {
1045 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1046 'info_dict': {
1047 'id': 'IB3lcPjvWLA',
1048 'ext': 'm4a',
1049 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1050 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1051 'duration': 244,
1052 'uploader': 'AfrojackVEVO',
1053 'uploader_id': 'AfrojackVEVO',
1054 'upload_date': '20131011',
1055 'abr': 129.495,
1056 },
1057 'params': {
1058 'youtube_include_dash_manifest': True,
1059 'format': '141/bestaudio[ext=m4a]',
1060 },
1061 },
1062 # Controversy video
1063 {
1064 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
1065 'info_dict': {
1066 'id': 'T4XJQO3qol8',
1067 'ext': 'mp4',
1068 'duration': 219,
1069 'upload_date': '20100909',
1070 'uploader': 'Amazing Atheist',
1071 'uploader_id': 'TheAmazingAtheist',
1072 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
1073 'title': 'Burning Everyone\'s Koran',
1074 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
1075 }
1076 },
1077 # Normal age-gate video (embed allowed)
1078 {
1079 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1080 'info_dict': {
1081 'id': 'HtVdAasjOgU',
1082 'ext': 'mp4',
1083 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1084 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1085 'duration': 142,
1086 'uploader': 'The Witcher',
1087 'uploader_id': 'WitcherGame',
1088 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1089 'upload_date': '20140605',
1090 'age_limit': 18,
1091 },
1092 },
1093 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1094 # YouTube Red ad is not captured for creator
1095 {
1096 'url': '__2ABJjxzNo',
1097 'info_dict': {
1098 'id': '__2ABJjxzNo',
1099 'ext': 'mp4',
1100 'duration': 266,
1101 'upload_date': '20100430',
1102 'uploader_id': 'deadmau5',
1103 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1104 'creator': 'deadmau5',
1105 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1106 'uploader': 'deadmau5',
1107 'title': 'Deadmau5 - Some Chords (HD)',
1108 'alt_title': 'Some Chords',
1109 },
1110 'expected_warnings': [
1111 'DASH manifest missing',
1112 ]
1113 },
1114 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1115 {
1116 'url': 'lqQg6PlCWgI',
1117 'info_dict': {
1118 'id': 'lqQg6PlCWgI',
1119 'ext': 'mp4',
1120 'duration': 6085,
1121 'upload_date': '20150827',
1122 'uploader_id': 'olympic',
1123 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1124 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1125 'uploader': 'Olympic',
1126 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1127 },
1128 'params': {
1129 'skip_download': 'requires avconv',
1130 }
1131 },
1132 # Non-square pixels
1133 {
1134 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1135 'info_dict': {
1136 'id': '_b-2C3KPAM0',
1137 'ext': 'mp4',
1138 'stretched_ratio': 16 / 9.,
1139 'duration': 85,
1140 'upload_date': '20110310',
1141 'uploader_id': 'AllenMeow',
1142 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1143 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1144 'uploader': '孫ᄋᄅ',
1145 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1146 },
1147 },
1148 # url_encoded_fmt_stream_map is empty string
1149 {
1150 'url': 'qEJwOuvDf7I',
1151 'info_dict': {
1152 'id': 'qEJwOuvDf7I',
1153 'ext': 'webm',
1154 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1155 'description': '',
1156 'upload_date': '20150404',
1157 'uploader_id': 'spbelect',
1158 'uploader': 'Наблюдатели Петербурга',
1159 },
1160 'params': {
1161 'skip_download': 'requires avconv',
1162 },
1163 'skip': 'This live event has ended.',
1164 },
1165 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1166 {
1167 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1168 'info_dict': {
1169 'id': 'FIl7x6_3R5Y',
1170 'ext': 'webm',
1171 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1172 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1173 'duration': 220,
1174 'upload_date': '20150625',
1175 'uploader_id': 'dorappi2000',
1176 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1177 'uploader': 'dorappi2000',
1178 'formats': 'mincount:31',
1179 },
1180 'skip': 'not actual anymore',
1181 },
1182 # DASH manifest with segment_list
1183 {
1184 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1185 'md5': '8ce563a1d667b599d21064e982ab9e31',
1186 'info_dict': {
1187 'id': 'CsmdDsKjzN8',
1188 'ext': 'mp4',
1189 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1190 'uploader': 'Airtek',
1191 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1192 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1193 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1194 },
1195 'params': {
1196 'youtube_include_dash_manifest': True,
1197 'format': '135', # bestvideo
1198 },
1199 'skip': 'This live event has ended.',
1200 },
1201 {
1202 # Multifeed videos (multiple cameras), URL is for Main Camera
1203 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1204 'info_dict': {
1205 'id': 'jvGDaLqkpTg',
1206 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1207 'description': 'md5:e03b909557865076822aa169218d6a5d',
1208 },
1209 'playlist': [{
1210 'info_dict': {
1211 'id': 'jvGDaLqkpTg',
1212 'ext': 'mp4',
1213 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1214 'description': 'md5:e03b909557865076822aa169218d6a5d',
1215 'duration': 10643,
1216 'upload_date': '20161111',
1217 'uploader': 'Team PGP',
1218 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1219 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1220 },
1221 }, {
1222 'info_dict': {
1223 'id': '3AKt1R1aDnw',
1224 'ext': 'mp4',
1225 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1226 'description': 'md5:e03b909557865076822aa169218d6a5d',
1227 'duration': 10991,
1228 'upload_date': '20161111',
1229 'uploader': 'Team PGP',
1230 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1231 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1232 },
1233 }, {
1234 'info_dict': {
1235 'id': 'RtAMM00gpVc',
1236 'ext': 'mp4',
1237 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1238 'description': 'md5:e03b909557865076822aa169218d6a5d',
1239 'duration': 10995,
1240 'upload_date': '20161111',
1241 'uploader': 'Team PGP',
1242 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1243 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1244 },
1245 }, {
1246 'info_dict': {
1247 'id': '6N2fdlP3C5U',
1248 'ext': 'mp4',
1249 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1250 'description': 'md5:e03b909557865076822aa169218d6a5d',
1251 'duration': 10990,
1252 'upload_date': '20161111',
1253 'uploader': 'Team PGP',
1254 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1255 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1256 },
1257 }],
1258 'params': {
1259 'skip_download': True,
1260 },
1261 },
1262 {
1263 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1264 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1265 'info_dict': {
1266 'id': 'gVfLd0zydlo',
1267 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1268 },
1269 'playlist_count': 2,
1270 'skip': 'Not multifeed anymore',
1271 },
1272 {
1273 'url': 'https://vid.plus/FlRa-iH7PGw',
1274 'only_matching': True,
1275 },
1276 {
1277 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1278 'only_matching': True,
1279 },
1280 {
1281 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1282 # Also tests cut-off URL expansion in video description (see
1283 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1284 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1285 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1286 'info_dict': {
1287 'id': 'lsguqyKfVQg',
1288 'ext': 'mp4',
1289 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1290 'alt_title': 'Dark Walk - Position Music',
1291 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1292 'duration': 133,
1293 'upload_date': '20151119',
1294 'uploader_id': 'IronSoulElf',
1295 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1296 'uploader': 'IronSoulElf',
1297 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1298 'track': 'Dark Walk - Position Music',
1299 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1300 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1301 },
1302 'params': {
1303 'skip_download': True,
1304 },
1305 },
1306 {
1307 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1308 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1309 'only_matching': True,
1310 },
1311 {
1312 # Video with yt:stretch=17:0
1313 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1314 'info_dict': {
1315 'id': 'Q39EVAstoRM',
1316 'ext': 'mp4',
1317 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1318 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1319 'upload_date': '20151107',
1320 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1321 'uploader': 'CH GAMER DROID',
1322 },
1323 'params': {
1324 'skip_download': True,
1325 },
1326 'skip': 'This video does not exist.',
1327 },
1328 {
1329 # Video with incomplete 'yt:stretch=16:'
1330 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1331 'only_matching': True,
1332 },
1333 {
1334 # Video licensed under Creative Commons
1335 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1336 'info_dict': {
1337 'id': 'M4gD1WSo5mA',
1338 'ext': 'mp4',
1339 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1340 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1341 'duration': 721,
1342 'upload_date': '20150127',
1343 'uploader_id': 'BerkmanCenter',
1344 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1345 'uploader': 'The Berkman Klein Center for Internet & Society',
1346 'license': 'Creative Commons Attribution license (reuse allowed)',
1347 },
1348 'params': {
1349 'skip_download': True,
1350 },
1351 },
1352 {
1353 # Channel-like uploader_url
1354 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1355 'info_dict': {
1356 'id': 'eQcmzGIKrzg',
1357 'ext': 'mp4',
1358 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1359 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1360 'duration': 4060,
1361 'upload_date': '20151119',
1362 'uploader': 'Bernie Sanders',
1363 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1364 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1365 'license': 'Creative Commons Attribution license (reuse allowed)',
1366 },
1367 'params': {
1368 'skip_download': True,
1369 },
1370 },
1371 {
1372 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1373 'only_matching': True,
1374 },
1375 {
1376 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1377 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1378 'only_matching': True,
1379 },
1380 {
1381 # Rental video preview
1382 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1383 'info_dict': {
1384 'id': 'uGpuVWrhIzE',
1385 'ext': 'mp4',
1386 'title': 'Piku - Trailer',
1387 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1388 'upload_date': '20150811',
1389 'uploader': 'FlixMatrix',
1390 'uploader_id': 'FlixMatrixKaravan',
1391 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1392 'license': 'Standard YouTube License',
1393 },
1394 'params': {
1395 'skip_download': True,
1396 },
1397 'skip': 'This video is not available.',
1398 },
1399 {
1400 # YouTube Red video with episode data
1401 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1402 'info_dict': {
1403 'id': 'iqKdEhx-dD4',
1404 'ext': 'mp4',
1405 'title': 'Isolation - Mind Field (Ep 1)',
1406 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1407 'duration': 2085,
1408 'upload_date': '20170118',
1409 'uploader': 'Vsauce',
1410 'uploader_id': 'Vsauce',
1411 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1412 'series': 'Mind Field',
1413 'season_number': 1,
1414 'episode_number': 1,
1415 },
1416 'params': {
1417 'skip_download': True,
1418 },
1419 'expected_warnings': [
1420 'Skipping DASH manifest',
1421 ],
1422 },
1423 {
1424 # The following content has been identified by the YouTube community
1425 # as inappropriate or offensive to some audiences.
1426 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1427 'info_dict': {
1428 'id': '6SJNVb0GnPI',
1429 'ext': 'mp4',
1430 'title': 'Race Differences in Intelligence',
1431 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1432 'duration': 965,
1433 'upload_date': '20140124',
1434 'uploader': 'New Century Foundation',
1435 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1436 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1437 },
1438 'params': {
1439 'skip_download': True,
1440 },
1441 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1442 },
1443 {
1444 # itag 212
1445 'url': '1t24XAntNCY',
1446 'only_matching': True,
1447 },
1448 {
1449 # geo restricted to JP
1450 'url': 'sJL6WA-aGkQ',
1451 'only_matching': True,
1452 },
1453 {
1454 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1455 'only_matching': True,
1456 },
1457 {
1458 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1459 'only_matching': True,
1460 },
1461 {
1462 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1463 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1464 'only_matching': True,
1465 },
1466 {
1467 # DRM protected
1468 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1469 'only_matching': True,
1470 },
1471 {
1472 # Video with unsupported adaptive stream type formats
1473 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1474 'info_dict': {
1475 'id': 'Z4Vy8R84T1U',
1476 'ext': 'mp4',
1477 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1478 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1479 'duration': 433,
1480 'upload_date': '20130923',
1481 'uploader': 'Amelia Putri Harwita',
1482 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1483 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1484 'formats': 'maxcount:10',
1485 },
1486 'params': {
1487 'skip_download': True,
1488 'youtube_include_dash_manifest': False,
1489 },
1490 'skip': 'not actual anymore',
1491 },
1492 {
1493 # Youtube Music Auto-generated description
1494 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1495 'info_dict': {
1496 'id': 'MgNrAu2pzNs',
1497 'ext': 'mp4',
1498 'title': 'Voyeur Girl',
1499 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1500 'upload_date': '20190312',
1501 'uploader': 'Stephen - Topic',
1502 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1503 'artist': 'Stephen',
1504 'track': 'Voyeur Girl',
1505 'album': 'it\'s too much love to know my dear',
1506 'release_date': '20190313',
1507 'release_year': 2019,
1508 },
1509 'params': {
1510 'skip_download': True,
1511 },
1512 },
1513 {
1514 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1515 'only_matching': True,
1516 },
1517 {
1518 # invalid -> valid video id redirection
1519 'url': 'DJztXj2GPfl',
1520 'info_dict': {
1521 'id': 'DJztXj2GPfk',
1522 'ext': 'mp4',
1523 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1524 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1525 'upload_date': '20090125',
1526 'uploader': 'Prochorowka',
1527 'uploader_id': 'Prochorowka',
1528 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1529 'artist': 'Panjabi MC',
1530 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1531 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1532 },
1533 'params': {
1534 'skip_download': True,
1535 },
1536 'skip': 'Video unavailable',
1537 },
1538 {
1539 # empty description results in an empty string
1540 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1541 'info_dict': {
1542 'id': 'x41yOUIvK2k',
1543 'ext': 'mp4',
1544 'title': 'IMG 3456',
1545 'description': '',
1546 'upload_date': '20170613',
1547 'uploader_id': 'ElevageOrVert',
1548 'uploader': 'ElevageOrVert',
1549 },
1550 'params': {
1551 'skip_download': True,
1552 },
1553 },
1554 {
1555 # with '};' inside yt initial data (see [1])
1556 # see [2] for an example with '};' inside ytInitialPlayerResponse
1557 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1558 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1559 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1560 'info_dict': {
1561 'id': 'CHqg6qOn4no',
1562 'ext': 'mp4',
1563 'title': 'Part 77 Sort a list of simple types in c#',
1564 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1565 'upload_date': '20130831',
1566 'uploader_id': 'kudvenkat',
1567 'uploader': 'kudvenkat',
1568 },
1569 'params': {
1570 'skip_download': True,
1571 },
1572 },
1573 {
1574 # another example of '};' in ytInitialData
1575 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1576 'only_matching': True,
1577 },
1578 {
1579 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1580 'only_matching': True,
1581 },
1582 {
1583 # https://github.com/ytdl-org/youtube-dl/pull/28094
1584 'url': 'OtqTfy26tG0',
1585 'info_dict': {
1586 'id': 'OtqTfy26tG0',
1587 'ext': 'mp4',
1588 'title': 'Burn Out',
1589 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1590 'upload_date': '20141120',
1591 'uploader': 'The Cinematic Orchestra - Topic',
1592 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1593 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1594 'artist': 'The Cinematic Orchestra',
1595 'track': 'Burn Out',
1596 'album': 'Every Day',
1597 'release_data': None,
1598 'release_year': None,
1599 },
1600 'params': {
1601 'skip_download': True,
1602 },
1603 },
1604 {
1605 # controversial video, only works with bpctr when authenticated with cookies
1606 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1607 'only_matching': True,
1608 },
1609 {
1610 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1611 'url': 'cBvYw8_A0vQ',
1612 'info_dict': {
1613 'id': 'cBvYw8_A0vQ',
1614 'ext': 'mp4',
1615 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1616 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1617 'upload_date': '20201120',
1618 'uploader': 'Walk around Japan',
1619 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1620 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1621 },
1622 'params': {
1623 'skip_download': True,
1624 },
1625 }, {
1626 # Has multiple audio streams
1627 'url': 'WaOKSUlf4TM',
1628 'only_matching': True
1629 }, {
1630 # Requires Premium: has format 141 when requested using YTM url
1631 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1632 'only_matching': True
1633 }, {
1634 # multiple subtitles with same lang_code
1635 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1636 'only_matching': True,
1637 }, {
1638 # Force use android client fallback
1639 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1640 'info_dict': {
1641 'id': 'YOelRv7fMxY',
1642 'title': 'Digging a Secret Tunnel from my Workshop',
1643 'ext': '3gp',
1644 'upload_date': '20210624',
1645 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1646 'uploader': 'colinfurze',
1647 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1648 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1649 },
1650 'params': {
1651 'format': '17', # 3gp format available on android
1652 'extractor_args': {'youtube': {'player_client': ['android']}},
1653 },
1654 },
1655 {
1656 # Skip download of additional client configs (remix client config in this case)
1657 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1658 'only_matching': True,
1659 'params': {
1660 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1661 },
1662 }
1663 ]
1664
1665 @classmethod
1666 def suitable(cls, url):
1667 # Hack for lazy extractors until more generic solution is implemented
1668 # (see #28780)
1669 from .youtube import parse_qs
1670 qs = parse_qs(url)
1671 if qs.get('list', [None])[0]:
1672 return False
1673 return super(YoutubeIE, cls).suitable(url)
1674
1675 def __init__(self, *args, **kwargs):
1676 super(YoutubeIE, self).__init__(*args, **kwargs)
1677 self._code_cache = {}
1678 self._player_cache = {}
1679
1680 def _extract_player_url(self, ytcfg=None, webpage=None):
1681 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1682 if not player_url:
1683 player_url = self._search_regex(
1684 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1685 webpage, 'player URL', fatal=False)
1686 if player_url.startswith('//'):
1687 player_url = 'https:' + player_url
1688 elif not re.match(r'https?://', player_url):
1689 player_url = compat_urlparse.urljoin(
1690 'https://www.youtube.com', player_url)
1691 return player_url
1692
1693 def _signature_cache_id(self, example_sig):
1694 """ Return a string representation of a signature """
1695 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1696
1697 @classmethod
1698 def _extract_player_info(cls, player_url):
1699 for player_re in cls._PLAYER_INFO_RE:
1700 id_m = re.search(player_re, player_url)
1701 if id_m:
1702 break
1703 else:
1704 raise ExtractorError('Cannot identify player %r' % player_url)
1705 return id_m.group('id')
1706
1707 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1708 player_id = self._extract_player_info(player_url)
1709 if player_id not in self._code_cache:
1710 self._code_cache[player_id] = self._download_webpage(
1711 player_url, video_id, fatal=fatal,
1712 note='Downloading player ' + player_id,
1713 errnote='Download of %s failed' % player_url)
1714 return player_id in self._code_cache
1715
1716 def _extract_signature_function(self, video_id, player_url, example_sig):
1717 player_id = self._extract_player_info(player_url)
1718
1719 # Read from filesystem cache
1720 func_id = 'js_%s_%s' % (
1721 player_id, self._signature_cache_id(example_sig))
1722 assert os.path.basename(func_id) == func_id
1723
1724 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1725 if cache_spec is not None:
1726 return lambda s: ''.join(s[i] for i in cache_spec)
1727
1728 if self._load_player(video_id, player_url):
1729 code = self._code_cache[player_id]
1730 res = self._parse_sig_js(code)
1731
1732 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1733 cache_res = res(test_string)
1734 cache_spec = [ord(c) for c in cache_res]
1735
1736 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1737 return res
1738
1739 def _print_sig_code(self, func, example_sig):
1740 def gen_sig_code(idxs):
1741 def _genslice(start, end, step):
1742 starts = '' if start == 0 else str(start)
1743 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1744 steps = '' if step == 1 else (':%d' % step)
1745 return 's[%s%s%s]' % (starts, ends, steps)
1746
1747 step = None
1748 # Quelch pyflakes warnings - start will be set when step is set
1749 start = '(Never used)'
1750 for i, prev in zip(idxs[1:], idxs[:-1]):
1751 if step is not None:
1752 if i - prev == step:
1753 continue
1754 yield _genslice(start, prev, step)
1755 step = None
1756 continue
1757 if i - prev in [-1, 1]:
1758 step = i - prev
1759 start = prev
1760 continue
1761 else:
1762 yield 's[%d]' % prev
1763 if step is None:
1764 yield 's[%d]' % i
1765 else:
1766 yield _genslice(start, i, step)
1767
1768 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1769 cache_res = func(test_string)
1770 cache_spec = [ord(c) for c in cache_res]
1771 expr_code = ' + '.join(gen_sig_code(cache_spec))
1772 signature_id_tuple = '(%s)' % (
1773 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1774 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1775 ' return %s\n') % (signature_id_tuple, expr_code)
1776 self.to_screen('Extracted signature function:\n' + code)
1777
1778 def _parse_sig_js(self, jscode):
1779 funcname = self._search_regex(
1780 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1781 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1782 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1783 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1784 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1785 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1786 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1787 # Obsolete patterns
1788 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1789 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1790 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1791 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1792 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1793 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1794 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1795 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1796 jscode, 'Initial JS player signature function name', group='sig')
1797
1798 jsi = JSInterpreter(jscode)
1799 initial_function = jsi.extract_function(funcname)
1800 return lambda s: initial_function([s])
1801
1802 def _decrypt_signature(self, s, video_id, player_url):
1803 """Turn the encrypted s field into a working signature"""
1804
1805 if player_url is None:
1806 raise ExtractorError('Cannot decrypt signature without player_url')
1807
1808 try:
1809 player_id = (player_url, self._signature_cache_id(s))
1810 if player_id not in self._player_cache:
1811 func = self._extract_signature_function(
1812 video_id, player_url, s
1813 )
1814 self._player_cache[player_id] = func
1815 func = self._player_cache[player_id]
1816 if self.get_param('youtube_print_sig_code'):
1817 self._print_sig_code(func, s)
1818 return func(s)
1819 except Exception as e:
1820 tb = traceback.format_exc()
1821 raise ExtractorError(
1822 'Signature extraction failed: ' + tb, cause=e)
1823
1824 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1825 """
1826 Extract signatureTimestamp (sts)
1827 Required to tell API what sig/player version is in use.
1828 """
1829 sts = None
1830 if isinstance(ytcfg, dict):
1831 sts = int_or_none(ytcfg.get('STS'))
1832
1833 if not sts:
1834 # Attempt to extract from player
1835 if player_url is None:
1836 error_msg = 'Cannot extract signature timestamp without player_url.'
1837 if fatal:
1838 raise ExtractorError(error_msg)
1839 self.report_warning(error_msg)
1840 return
1841 if self._load_player(video_id, player_url, fatal=fatal):
1842 player_id = self._extract_player_info(player_url)
1843 code = self._code_cache[player_id]
1844 sts = int_or_none(self._search_regex(
1845 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1846 'JS player signature timestamp', group='sts', fatal=fatal))
1847 return sts
1848
1849 def _mark_watched(self, video_id, player_response):
1850 playback_url = url_or_none(try_get(
1851 player_response,
1852 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
1853 if not playback_url:
1854 return
1855 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1856 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1857
1858 # cpn generation algorithm is reverse engineered from base.js.
1859 # In fact it works even with dummy cpn.
1860 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1861 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1862
1863 qs.update({
1864 'ver': ['2'],
1865 'cpn': [cpn],
1866 })
1867 playback_url = compat_urlparse.urlunparse(
1868 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1869
1870 self._download_webpage(
1871 playback_url, video_id, 'Marking watched',
1872 'Unable to mark watched', fatal=False)
1873
1874 @staticmethod
1875 def _extract_urls(webpage):
1876 # Embedded YouTube player
1877 entries = [
1878 unescapeHTML(mobj.group('url'))
1879 for mobj in re.finditer(r'''(?x)
1880 (?:
1881 <iframe[^>]+?src=|
1882 data-video-url=|
1883 <embed[^>]+?src=|
1884 embedSWF\(?:\s*|
1885 <object[^>]+data=|
1886 new\s+SWFObject\(
1887 )
1888 (["\'])
1889 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1890 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1891 \1''', webpage)]
1892
1893 # lazyYT YouTube embed
1894 entries.extend(list(map(
1895 unescapeHTML,
1896 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1897
1898 # Wordpress "YouTube Video Importer" plugin
1899 matches = re.findall(r'''(?x)<div[^>]+
1900 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1901 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1902 entries.extend(m[-1] for m in matches)
1903
1904 return entries
1905
1906 @staticmethod
1907 def _extract_url(webpage):
1908 urls = YoutubeIE._extract_urls(webpage)
1909 return urls[0] if urls else None
1910
1911 @classmethod
1912 def extract_id(cls, url):
1913 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1914 if mobj is None:
1915 raise ExtractorError('Invalid URL: %s' % url)
1916 video_id = mobj.group(2)
1917 return video_id
1918
1919 def _extract_chapters_from_json(self, data, video_id, duration):
1920 chapters_list = try_get(
1921 data,
1922 lambda x: x['playerOverlays']
1923 ['playerOverlayRenderer']
1924 ['decoratedPlayerBarRenderer']
1925 ['decoratedPlayerBarRenderer']
1926 ['playerBar']
1927 ['chapteredPlayerBarRenderer']
1928 ['chapters'],
1929 list)
1930 if not chapters_list:
1931 return
1932
1933 def chapter_time(chapter):
1934 return float_or_none(
1935 try_get(
1936 chapter,
1937 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1938 int),
1939 scale=1000)
1940 chapters = []
1941 for next_num, chapter in enumerate(chapters_list, start=1):
1942 start_time = chapter_time(chapter)
1943 if start_time is None:
1944 continue
1945 end_time = (chapter_time(chapters_list[next_num])
1946 if next_num < len(chapters_list) else duration)
1947 if end_time is None:
1948 continue
1949 title = try_get(
1950 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1951 compat_str)
1952 chapters.append({
1953 'start_time': start_time,
1954 'end_time': end_time,
1955 'title': title,
1956 })
1957 return chapters
1958
1959 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1960 return self._parse_json(self._search_regex(
1961 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1962 regex), webpage, name, default='{}'), video_id, fatal=False)
1963
1964 @staticmethod
1965 def parse_time_text(time_text):
1966 """
1967 Parse the comment time text
1968 time_text is in the format 'X units ago (edited)'
1969 """
1970 time_text_split = time_text.split(' ')
1971 if len(time_text_split) >= 3:
1972 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1973
1974 @staticmethod
1975 def _join_text_entries(runs):
1976 text = None
1977 for run in runs:
1978 if not isinstance(run, dict):
1979 continue
1980 sub_text = try_get(run, lambda x: x['text'], compat_str)
1981 if sub_text:
1982 if not text:
1983 text = sub_text
1984 continue
1985 text += sub_text
1986 return text
1987
1988 def _extract_comment(self, comment_renderer, parent=None):
1989 comment_id = comment_renderer.get('commentId')
1990 if not comment_id:
1991 return
1992 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1993 text = self._join_text_entries(comment_text_runs) or ''
1994 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1995 time_text = self._join_text_entries(comment_time_text)
1996 # note: timestamp is an estimate calculated from the current time and time_text
1997 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
1998 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1999 author_id = try_get(comment_renderer,
2000 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2001 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2002 lambda x: x['likeCount']), compat_str)) or 0
2003 author_thumbnail = try_get(comment_renderer,
2004 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2005
2006 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2007 is_favorited = 'creatorHeart' in (try_get(
2008 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2009 return {
2010 'id': comment_id,
2011 'text': text,
2012 'timestamp': timestamp,
2013 'time_text': time_text,
2014 'like_count': votes,
2015 'is_favorited': is_favorited,
2016 'author': author,
2017 'author_id': author_id,
2018 'author_thumbnail': author_thumbnail,
2019 'author_is_uploader': author_is_uploader,
2020 'parent': parent or 'root'
2021 }
2022
2023 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2024 ytcfg, video_id, parent=None, comment_counts=None):
2025
2026 def extract_header(contents):
2027 _total_comments = 0
2028 _continuation = None
2029 for content in contents:
2030 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2031 expected_comment_count = try_get(comments_header_renderer,
2032 (lambda x: x['countText']['runs'][0]['text'],
2033 lambda x: x['commentsCount']['runs'][0]['text']),
2034 compat_str)
2035 if expected_comment_count:
2036 comment_counts[1] = str_to_int(expected_comment_count)
2037 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
2038 _total_comments = comment_counts[1]
2039 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2040 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2041
2042 sort_menu_item = try_get(
2043 comments_header_renderer,
2044 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2045 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2046
2047 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2048 if not _continuation:
2049 continue
2050
2051 sort_text = sort_menu_item.get('title')
2052 if isinstance(sort_text, compat_str):
2053 sort_text = sort_text.lower()
2054 else:
2055 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2056 self.to_screen('Sorting comments by %s' % sort_text)
2057 break
2058 return _total_comments, _continuation
2059
2060 def extract_thread(contents):
2061 if not parent:
2062 comment_counts[2] = 0
2063 for content in contents:
2064 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2065 comment_renderer = try_get(
2066 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2067 content, (lambda x: x['commentRenderer'], dict))
2068
2069 if not comment_renderer:
2070 continue
2071 comment = self._extract_comment(comment_renderer, parent)
2072 if not comment:
2073 continue
2074 comment_counts[0] += 1
2075 yield comment
2076 # Attempt to get the replies
2077 comment_replies_renderer = try_get(
2078 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2079
2080 if comment_replies_renderer:
2081 comment_counts[2] += 1
2082 comment_entries_iter = self._comment_entries(
2083 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2084 video_id, parent=comment.get('id'), comment_counts=comment_counts)
2085
2086 for reply_comment in comment_entries_iter:
2087 yield reply_comment
2088
2089 # YouTube comments have a max depth of 2
2090 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2091 if max_depth == 1 and parent:
2092 return
2093 if not comment_counts:
2094 # comment so far, est. total comments, current comment thread #
2095 comment_counts = [0, 0, 0]
2096
2097 continuation = self._extract_continuation(root_continuation_data)
2098 if continuation and len(continuation['ctoken']) < 27:
2099 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2100 continuation_token = self._generate_comment_continuation(video_id)
2101 continuation = self._build_continuation_query(continuation_token, None)
2102
2103 visitor_data = None
2104 is_first_continuation = parent is None
2105
2106 for page_num in itertools.count(0):
2107 if not continuation:
2108 break
2109 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2110 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2111 if page_num == 0:
2112 if is_first_continuation:
2113 note_prefix = 'Downloading comment section API JSON'
2114 else:
2115 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2116 comment_counts[2], comment_prog_str)
2117 else:
2118 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2119 ' ' if parent else '', ' replies' if parent else '',
2120 page_num, comment_prog_str)
2121
2122 response = self._extract_response(
2123 item_id=None, query=self._continuation_query_ajax_to_api(continuation),
2124 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2125 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2126 if not response:
2127 break
2128 visitor_data = try_get(
2129 response,
2130 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2131 compat_str) or visitor_data
2132
2133 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2134
2135 continuation = None
2136 if isinstance(continuation_contents, list):
2137 for continuation_section in continuation_contents:
2138 if not isinstance(continuation_section, dict):
2139 continue
2140 continuation_items = try_get(
2141 continuation_section,
2142 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2143 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2144 list) or []
2145 if is_first_continuation:
2146 total_comments, continuation = extract_header(continuation_items)
2147 if total_comments:
2148 yield total_comments
2149 is_first_continuation = False
2150 if continuation:
2151 break
2152 continue
2153 count = 0
2154 for count, entry in enumerate(extract_thread(continuation_items)):
2155 yield entry
2156 continuation = self._extract_continuation({'contents': continuation_items})
2157 if continuation:
2158 # Sometimes YouTube provides a continuation without any comments
2159 # In most cases we end up just downloading these with very little comments to come.
2160 if count == 0:
2161 if not parent:
2162 self.report_warning('No comments received - assuming end of comments')
2163 continuation = None
2164 break
2165
2166 # Deprecated response structure
2167 elif isinstance(continuation_contents, dict):
2168 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2169 for key, continuation_renderer in continuation_contents.items():
2170 if key not in known_continuation_renderers:
2171 continue
2172 if not isinstance(continuation_renderer, dict):
2173 continue
2174 if is_first_continuation:
2175 header_continuation_items = [continuation_renderer.get('header') or {}]
2176 total_comments, continuation = extract_header(header_continuation_items)
2177 if total_comments:
2178 yield total_comments
2179 is_first_continuation = False
2180 if continuation:
2181 break
2182
2183 # Sometimes YouTube provides a continuation without any comments
2184 # In most cases we end up just downloading these with very little comments to come.
2185 count = 0
2186 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2187 yield entry
2188 continuation = self._extract_continuation(continuation_renderer)
2189 if count == 0:
2190 if not parent:
2191 self.report_warning('No comments received - assuming end of comments')
2192 continuation = None
2193 break
2194
2195 @staticmethod
2196 def _generate_comment_continuation(video_id):
2197 """
2198 Generates initial comment section continuation token from given video id
2199 """
2200 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2201 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2202 new_continuation_intlist = list(itertools.chain.from_iterable(
2203 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2204 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2205
2206 def _extract_comments(self, ytcfg, video_id, contents, webpage):
2207 """Entry for comment extraction"""
2208 def _real_comment_extract(contents):
2209 if isinstance(contents, list):
2210 for entry in contents:
2211 for key, renderer in entry.items():
2212 if key not in known_entry_comment_renderers:
2213 continue
2214 yield from self._comment_entries(
2215 renderer, video_id=video_id, ytcfg=ytcfg,
2216 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2217 account_syncid=self._extract_account_syncid(ytcfg))
2218 break
2219 comments = []
2220 known_entry_comment_renderers = ('itemSectionRenderer',)
2221 estimated_total = 0
2222 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2223
2224 try:
2225 for comment in _real_comment_extract(contents):
2226 if len(comments) >= max_comments:
2227 break
2228 if isinstance(comment, int):
2229 estimated_total = comment
2230 continue
2231 comments.append(comment)
2232 except KeyboardInterrupt:
2233 self.to_screen('Interrupted by user')
2234 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2235 return {
2236 'comments': comments,
2237 'comment_count': len(comments),
2238 }
2239
2240 @staticmethod
2241 def _generate_player_context(sts=None):
2242 context = {
2243 'html5Preference': 'HTML5_PREF_WANTS',
2244 }
2245 if sts is not None:
2246 context['signatureTimestamp'] = sts
2247 return {
2248 'playbackContext': {
2249 'contentPlaybackContext': context
2250 }
2251 }
2252
2253 @staticmethod
2254 def _get_video_info_params(video_id, client='TVHTML5'):
2255 GVI_CLIENTS = {
2256 'ANDROID': {
2257 'c': 'ANDROID',
2258 'cver': '16.20',
2259 },
2260 'TVHTML5': {
2261 'c': 'TVHTML5',
2262 'cver': '6.20180913',
2263 }
2264 }
2265 query = {
2266 'video_id': video_id,
2267 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2268 'html5': '1'
2269 }
2270 query.update(GVI_CLIENTS.get(client))
2271 return query
2272
2273 def _real_extract(self, url):
2274 url, smuggled_data = unsmuggle_url(url, {})
2275 video_id = self._match_id(url)
2276
2277 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2278
2279 base_url = self.http_scheme() + '//www.youtube.com/'
2280 webpage_url = base_url + 'watch?v=' + video_id
2281 webpage = self._download_webpage(
2282 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2283
2284 ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2285 identity_token = self._extract_identity_token(webpage, video_id)
2286 syncid = self._extract_account_syncid(ytcfg)
2287 headers = self._generate_api_headers(ytcfg, identity_token, syncid)
2288
2289 player_url = self._extract_player_url(ytcfg, webpage)
2290
2291 player_client = self._configuration_arg('player_client', [''])[0]
2292 if player_client not in ('web', 'android', ''):
2293 self.report_warning(f'Invalid player_client {player_client} given. Falling back to android client.')
2294 force_mobile_client = player_client != 'web'
2295 player_skip = self._configuration_arg('player_skip')
2296
2297 def get_text(x):
2298 if not x:
2299 return
2300 text = x.get('simpleText')
2301 if text and isinstance(text, compat_str):
2302 return text
2303 runs = x.get('runs')
2304 if not isinstance(runs, list):
2305 return
2306 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
2307
2308 ytm_streaming_data = {}
2309 if is_music_url:
2310 ytm_webpage = None
2311 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2312 if sts and not force_mobile_client and 'configs' not in player_skip:
2313 ytm_webpage = self._download_webpage(
2314 'https://music.youtube.com',
2315 video_id, fatal=False, note='Downloading remix client config')
2316
2317 ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2318 ytm_client = 'WEB_REMIX'
2319 if not sts or force_mobile_client:
2320 # Android client already has signature descrambled
2321 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2322 if not sts:
2323 self.report_warning('Falling back to android remix client for player API.')
2324 ytm_client = 'ANDROID_MUSIC'
2325 ytm_cfg = {}
2326
2327 ytm_headers = self._generate_api_headers(
2328 ytm_cfg, identity_token, syncid,
2329 client=ytm_client)
2330 ytm_query = {'videoId': video_id}
2331 ytm_query.update(self._generate_player_context(sts))
2332
2333 ytm_player_response = self._extract_response(
2334 item_id=video_id, ep='player', query=ytm_query,
2335 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2336 default_client=ytm_client,
2337 note='Downloading %sremix player API JSON' % ('android ' if force_mobile_client else ''))
2338 ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}
2339
2340 player_response = None
2341 if webpage:
2342 player_response = self._extract_yt_initial_variable(
2343 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2344 video_id, 'initial player response')
2345
2346 if not player_response or force_mobile_client:
2347 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2348 yt_client = 'WEB'
2349 ytpcfg = ytcfg
2350 ytp_headers = headers
2351 if not sts or force_mobile_client:
2352 # Android client already has signature descrambled
2353 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2354 if not sts:
2355 self.report_warning('Falling back to android client for player API.')
2356 yt_client = 'ANDROID'
2357 ytpcfg = {}
2358 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid, yt_client)
2359
2360 yt_query = {'videoId': video_id}
2361 yt_query.update(self._generate_player_context(sts))
2362 player_response = self._extract_response(
2363 item_id=video_id, ep='player', query=yt_query,
2364 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2365 default_client=yt_client,
2366 note='Downloading %splayer API JSON' % ('android ' if force_mobile_client else '')
2367 ) or player_response
2368
2369 # Age-gate workarounds
2370 playability_status = player_response.get('playabilityStatus') or {}
2371 if playability_status.get('reason') in self._AGE_GATE_REASONS:
2372 gvi_clients = ('ANDROID', 'TVHTML5') if force_mobile_client else ('TVHTML5', 'ANDROID')
2373 for gvi_client in gvi_clients:
2374 pr = self._parse_json(try_get(compat_parse_qs(
2375 self._download_webpage(
2376 base_url + 'get_video_info', video_id,
2377 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2378 'unable to download video info webpage', fatal=False,
2379 query=self._get_video_info_params(video_id, client=gvi_client))),
2380 lambda x: x['player_response'][0],
2381 compat_str) or '{}', video_id)
2382 if pr:
2383 break
2384 if not pr:
2385 self.report_warning('Falling back to embedded-only age-gate workaround.')
2386 embed_webpage = None
2387 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2388 if sts and not force_mobile_client and 'configs' not in player_skip:
2389 embed_webpage = self._download_webpage(
2390 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2391 video_id=video_id, note='Downloading age-gated embed config')
2392
2393 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2394 # If we extracted the embed webpage, it'll tell us if we can view the video
2395 embedded_pr = self._parse_json(
2396 try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2397 video_id=video_id)
2398 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2399 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2400 yt_client = 'WEB_EMBEDDED_PLAYER'
2401 if not sts or force_mobile_client:
2402 # Android client already has signature descrambled
2403 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2404 if not sts:
2405 self.report_warning(
2406 'Falling back to android embedded client for player API (note: some formats may be missing).')
2407 yt_client = 'ANDROID_EMBEDDED_PLAYER'
2408 ytcfg_age = {}
2409
2410 ytage_headers = self._generate_api_headers(
2411 ytcfg_age, identity_token, syncid, client=yt_client)
2412 yt_age_query = {'videoId': video_id}
2413 yt_age_query.update(self._generate_player_context(sts))
2414 pr = self._extract_response(
2415 item_id=video_id, ep='player', query=yt_age_query,
2416 ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2417 default_client=yt_client,
2418 note='Downloading %sage-gated player API JSON' % ('android ' if force_mobile_client else '')
2419 ) or {}
2420
2421 if pr:
2422 player_response = pr
2423
2424 trailer_video_id = try_get(
2425 playability_status,
2426 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2427 compat_str)
2428 if trailer_video_id:
2429 return self.url_result(
2430 trailer_video_id, self.ie_key(), trailer_video_id)
2431
2432 search_meta = (
2433 lambda x: self._html_search_meta(x, webpage, default=None)) \
2434 if webpage else lambda x: None
2435
2436 video_details = player_response.get('videoDetails') or {}
2437 microformat = try_get(
2438 player_response,
2439 lambda x: x['microformat']['playerMicroformatRenderer'],
2440 dict) or {}
2441 video_title = video_details.get('title') \
2442 or get_text(microformat.get('title')) \
2443 or search_meta(['og:title', 'twitter:title', 'title'])
2444 video_description = video_details.get('shortDescription')
2445
2446 if not smuggled_data.get('force_singlefeed', False):
2447 if not self.get_param('noplaylist'):
2448 multifeed_metadata_list = try_get(
2449 player_response,
2450 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
2451 compat_str)
2452 if multifeed_metadata_list:
2453 entries = []
2454 feed_ids = []
2455 for feed in multifeed_metadata_list.split(','):
2456 # Unquote should take place before split on comma (,) since textual
2457 # fields may contain comma as well (see
2458 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2459 feed_data = compat_parse_qs(
2460 compat_urllib_parse_unquote_plus(feed))
2461
2462 def feed_entry(name):
2463 return try_get(
2464 feed_data, lambda x: x[name][0], compat_str)
2465
2466 feed_id = feed_entry('id')
2467 if not feed_id:
2468 continue
2469 feed_title = feed_entry('title')
2470 title = video_title
2471 if feed_title:
2472 title += ' (%s)' % feed_title
2473 entries.append({
2474 '_type': 'url_transparent',
2475 'ie_key': 'Youtube',
2476 'url': smuggle_url(
2477 base_url + 'watch?v=' + feed_data['id'][0],
2478 {'force_singlefeed': True}),
2479 'title': title,
2480 })
2481 feed_ids.append(feed_id)
2482 self.to_screen(
2483 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2484 % (', '.join(feed_ids), video_id))
2485 return self.playlist_result(
2486 entries, video_id, video_title, video_description)
2487 else:
2488 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2489
2490 formats, itags, stream_ids = [], [], []
2491 itag_qualities = {}
2492 q = qualities([
2493 # "tiny" is the smallest video-only format. But some audio-only formats
2494 # was also labeled "tiny". It is not clear if such formats still exist
2495 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2496 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2497 ])
2498
2499 streaming_data = player_response.get('streamingData') or {}
2500 streaming_formats = streaming_data.get('formats') or []
2501 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
2502 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2503 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2504
2505 for fmt in streaming_formats:
2506 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2507 continue
2508
2509 itag = str_or_none(fmt.get('itag'))
2510 audio_track = fmt.get('audioTrack') or {}
2511 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2512 if stream_id in stream_ids:
2513 continue
2514
2515 quality = fmt.get('quality')
2516 if quality == 'tiny' or not quality:
2517 quality = fmt.get('audioQuality', '').lower() or quality
2518 if itag and quality:
2519 itag_qualities[itag] = quality
2520 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2521 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2522 # number of fragment that would subsequently requested with (`&sq=N`)
2523 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2524 continue
2525
2526 fmt_url = fmt.get('url')
2527 if not fmt_url:
2528 sc = compat_parse_qs(fmt.get('signatureCipher'))
2529 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2530 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2531 if not (sc and fmt_url and encrypted_sig):
2532 continue
2533 if not player_url:
2534 continue
2535 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2536 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2537 fmt_url += '&' + sp + '=' + signature
2538
2539 if itag:
2540 itags.append(itag)
2541 stream_ids.append(stream_id)
2542
2543 tbr = float_or_none(
2544 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2545 dct = {
2546 'asr': int_or_none(fmt.get('audioSampleRate')),
2547 'filesize': int_or_none(fmt.get('contentLength')),
2548 'format_id': itag,
2549 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
2550 'fps': int_or_none(fmt.get('fps')),
2551 'height': int_or_none(fmt.get('height')),
2552 'quality': q(quality),
2553 'tbr': tbr,
2554 'url': fmt_url,
2555 'width': fmt.get('width'),
2556 'language': audio_track.get('id', '').split('.')[0],
2557 }
2558 mime_mobj = re.match(
2559 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2560 if mime_mobj:
2561 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2562 dct.update(parse_codecs(mime_mobj.group(2)))
2563 # The 3gp format in android client has a quality of "small",
2564 # but is actually worse than all other formats
2565 if dct['ext'] == '3gp':
2566 dct['quality'] = q('tiny')
2567 no_audio = dct.get('acodec') == 'none'
2568 no_video = dct.get('vcodec') == 'none'
2569 if no_audio:
2570 dct['vbr'] = tbr
2571 if no_video:
2572 dct['abr'] = tbr
2573 if no_audio or no_video:
2574 dct['downloader_options'] = {
2575 # Youtube throttles chunks >~10M
2576 'http_chunk_size': 10485760,
2577 }
2578 if dct.get('ext'):
2579 dct['container'] = dct['ext'] + '_dash'
2580 formats.append(dct)
2581
2582 skip_manifests = self._configuration_arg('skip')
2583 get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2584 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2585
2586 for sd in (streaming_data, ytm_streaming_data):
2587 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2588 if hls_manifest_url:
2589 for f in self._extract_m3u8_formats(
2590 hls_manifest_url, video_id, 'mp4', fatal=False):
2591 itag = self._search_regex(
2592 r'/itag/(\d+)', f['url'], 'itag', default=None)
2593 if itag:
2594 f['format_id'] = itag
2595 formats.append(f)
2596
2597 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2598 if dash_manifest_url:
2599 for f in self._extract_mpd_formats(
2600 dash_manifest_url, video_id, fatal=False):
2601 itag = f['format_id']
2602 if itag in itags:
2603 continue
2604 if itag in itag_qualities:
2605 f['quality'] = q(itag_qualities[itag])
2606 filesize = int_or_none(self._search_regex(
2607 r'/clen/(\d+)', f.get('fragment_base_url')
2608 or f['url'], 'file size', default=None))
2609 if filesize:
2610 f['filesize'] = filesize
2611 formats.append(f)
2612
2613 if not formats:
2614 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
2615 self.raise_no_formats(
2616 'This video is DRM protected.', expected=True)
2617 pemr = try_get(
2618 playability_status,
2619 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2620 dict) or {}
2621 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2622 subreason = pemr.get('subreason')
2623 if subreason:
2624 subreason = clean_html(get_text(subreason))
2625 if subreason == 'The uploader has not made this video available in your country.':
2626 countries = microformat.get('availableCountries')
2627 if not countries:
2628 regions_allowed = search_meta('regionsAllowed')
2629 countries = regions_allowed.split(',') if regions_allowed else None
2630 self.raise_geo_restricted(subreason, countries, metadata_available=True)
2631 reason += '\n' + subreason
2632 if reason:
2633 self.raise_no_formats(reason, expected=True)
2634
2635 self._sort_formats(formats)
2636
2637 keywords = video_details.get('keywords') or []
2638 if not keywords and webpage:
2639 keywords = [
2640 unescapeHTML(m.group('content'))
2641 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2642 for keyword in keywords:
2643 if keyword.startswith('yt:stretch='):
2644 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2645 if mobj:
2646 # NB: float is intentional for forcing float division
2647 w, h = (float(v) for v in mobj.groups())
2648 if w > 0 and h > 0:
2649 ratio = w / h
2650 for f in formats:
2651 if f.get('vcodec') != 'none':
2652 f['stretched_ratio'] = ratio
2653 break
2654
2655 thumbnails = []
2656 for container in (video_details, microformat):
2657 for thumbnail in (try_get(
2658 container,
2659 lambda x: x['thumbnail']['thumbnails'], list) or []):
2660 thumbnail_url = thumbnail.get('url')
2661 if not thumbnail_url:
2662 continue
2663 # Sometimes youtube gives a wrong thumbnail URL. See:
2664 # https://github.com/yt-dlp/yt-dlp/issues/233
2665 # https://github.com/ytdl-org/youtube-dl/issues/28023
2666 if 'maxresdefault' in thumbnail_url:
2667 thumbnail_url = thumbnail_url.split('?')[0]
2668 thumbnails.append({
2669 'url': thumbnail_url,
2670 'height': int_or_none(thumbnail.get('height')),
2671 'width': int_or_none(thumbnail.get('width')),
2672 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2673 })
2674 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2675 if thumbnail_url:
2676 thumbnails.append({
2677 'url': thumbnail_url,
2678 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2679 })
2680 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2681 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2682 thumbnails.append({
2683 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2684 'preference': 1,
2685 })
2686 self._remove_duplicate_formats(thumbnails)
2687
2688 category = microformat.get('category') or search_meta('genre')
2689 channel_id = video_details.get('channelId') \
2690 or microformat.get('externalChannelId') \
2691 or search_meta('channelId')
2692 duration = int_or_none(
2693 video_details.get('lengthSeconds')
2694 or microformat.get('lengthSeconds')) \
2695 or parse_duration(search_meta('duration'))
2696 is_live = video_details.get('isLive')
2697 is_upcoming = video_details.get('isUpcoming')
2698 owner_profile_url = microformat.get('ownerProfileUrl')
2699
2700 info = {
2701 'id': video_id,
2702 'title': self._live_title(video_title) if is_live else video_title,
2703 'formats': formats,
2704 'thumbnails': thumbnails,
2705 'description': video_description,
2706 'upload_date': unified_strdate(
2707 microformat.get('uploadDate')
2708 or search_meta('uploadDate')),
2709 'uploader': video_details['author'],
2710 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2711 'uploader_url': owner_profile_url,
2712 'channel_id': channel_id,
2713 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2714 'duration': duration,
2715 'view_count': int_or_none(
2716 video_details.get('viewCount')
2717 or microformat.get('viewCount')
2718 or search_meta('interactionCount')),
2719 'average_rating': float_or_none(video_details.get('averageRating')),
2720 'age_limit': 18 if (
2721 microformat.get('isFamilySafe') is False
2722 or search_meta('isFamilyFriendly') == 'false'
2723 or search_meta('og:restrictions:age') == '18+') else 0,
2724 'webpage_url': webpage_url,
2725 'categories': [category] if category else None,
2726 'tags': keywords,
2727 'is_live': is_live,
2728 'playable_in_embed': playability_status.get('playableInEmbed'),
2729 'was_live': video_details.get('isLiveContent'),
2730 }
2731
2732 pctr = try_get(
2733 player_response,
2734 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2735 subtitles = {}
2736 if pctr:
2737 def process_language(container, base_url, lang_code, sub_name, query):
2738 lang_subs = container.setdefault(lang_code, [])
2739 for fmt in self._SUBTITLE_FORMATS:
2740 query.update({
2741 'fmt': fmt,
2742 })
2743 lang_subs.append({
2744 'ext': fmt,
2745 'url': update_url_query(base_url, query),
2746 'name': sub_name,
2747 })
2748
2749 for caption_track in (pctr.get('captionTracks') or []):
2750 base_url = caption_track.get('baseUrl')
2751 if not base_url:
2752 continue
2753 if caption_track.get('kind') != 'asr':
2754 lang_code = (
2755 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2756 or caption_track.get('languageCode'))
2757 if not lang_code:
2758 continue
2759 process_language(
2760 subtitles, base_url, lang_code,
2761 try_get(caption_track, lambda x: x['name']['simpleText']),
2762 {})
2763 continue
2764 automatic_captions = {}
2765 for translation_language in (pctr.get('translationLanguages') or []):
2766 translation_language_code = translation_language.get('languageCode')
2767 if not translation_language_code:
2768 continue
2769 process_language(
2770 automatic_captions, base_url, translation_language_code,
2771 try_get(translation_language, (
2772 lambda x: x['languageName']['simpleText'],
2773 lambda x: x['languageName']['runs'][0]['text'])),
2774 {'tlang': translation_language_code})
2775 info['automatic_captions'] = automatic_captions
2776 info['subtitles'] = subtitles
2777
2778 parsed_url = compat_urllib_parse_urlparse(url)
2779 for component in [parsed_url.fragment, parsed_url.query]:
2780 query = compat_parse_qs(component)
2781 for k, v in query.items():
2782 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2783 d_k += '_time'
2784 if d_k not in info and k in s_ks:
2785 info[d_k] = parse_duration(query[k][0])
2786
2787 # Youtube Music Auto-generated description
2788 if video_description:
2789 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2790 if mobj:
2791 release_year = mobj.group('release_year')
2792 release_date = mobj.group('release_date')
2793 if release_date:
2794 release_date = release_date.replace('-', '')
2795 if not release_year:
2796 release_year = release_date[:4]
2797 info.update({
2798 'album': mobj.group('album'.strip()),
2799 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2800 'track': mobj.group('track').strip(),
2801 'release_date': release_date,
2802 'release_year': int_or_none(release_year),
2803 })
2804
2805 initial_data = None
2806 if webpage:
2807 initial_data = self._extract_yt_initial_variable(
2808 webpage, self._YT_INITIAL_DATA_RE, video_id,
2809 'yt initial data')
2810 if not initial_data:
2811 initial_data = self._extract_response(
2812 item_id=video_id, ep='next', fatal=False,
2813 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2814 note='Downloading initial data API JSON')
2815
2816 try:
2817 # This will error if there is no livechat
2818 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2819 info['subtitles']['live_chat'] = [{
2820 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2821 'video_id': video_id,
2822 'ext': 'json',
2823 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2824 }]
2825 except (KeyError, IndexError, TypeError):
2826 pass
2827
2828 if initial_data:
2829 chapters = self._extract_chapters_from_json(
2830 initial_data, video_id, duration)
2831 if not chapters:
2832 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2833 contents = try_get(
2834 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2835 list)
2836 if not contents:
2837 continue
2838
2839 def chapter_time(mmlir):
2840 return parse_duration(
2841 get_text(mmlir.get('timeDescription')))
2842
2843 chapters = []
2844 for next_num, content in enumerate(contents, start=1):
2845 mmlir = content.get('macroMarkersListItemRenderer') or {}
2846 start_time = chapter_time(mmlir)
2847 end_time = chapter_time(try_get(
2848 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2849 if next_num < len(contents) else duration
2850 if start_time is None or end_time is None:
2851 continue
2852 chapters.append({
2853 'start_time': start_time,
2854 'end_time': end_time,
2855 'title': get_text(mmlir.get('title')),
2856 })
2857 if chapters:
2858 break
2859 if chapters:
2860 info['chapters'] = chapters
2861
2862 contents = try_get(
2863 initial_data,
2864 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2865 list) or []
2866 for content in contents:
2867 vpir = content.get('videoPrimaryInfoRenderer')
2868 if vpir:
2869 stl = vpir.get('superTitleLink')
2870 if stl:
2871 stl = get_text(stl)
2872 if try_get(
2873 vpir,
2874 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2875 info['location'] = stl
2876 else:
2877 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2878 if mobj:
2879 info.update({
2880 'series': mobj.group(1),
2881 'season_number': int(mobj.group(2)),
2882 'episode_number': int(mobj.group(3)),
2883 })
2884 for tlb in (try_get(
2885 vpir,
2886 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2887 list) or []):
2888 tbr = tlb.get('toggleButtonRenderer') or {}
2889 for getter, regex in [(
2890 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2891 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2892 lambda x: x['accessibility'],
2893 lambda x: x['accessibilityData']['accessibilityData'],
2894 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2895 label = (try_get(tbr, getter, dict) or {}).get('label')
2896 if label:
2897 mobj = re.match(regex, label)
2898 if mobj:
2899 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2900 break
2901 sbr_tooltip = try_get(
2902 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2903 if sbr_tooltip:
2904 like_count, dislike_count = sbr_tooltip.split(' / ')
2905 info.update({
2906 'like_count': str_to_int(like_count),
2907 'dislike_count': str_to_int(dislike_count),
2908 })
2909 vsir = content.get('videoSecondaryInfoRenderer')
2910 if vsir:
2911 info['channel'] = get_text(try_get(
2912 vsir,
2913 lambda x: x['owner']['videoOwnerRenderer']['title'],
2914 dict))
2915 rows = try_get(
2916 vsir,
2917 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2918 list) or []
2919 multiple_songs = False
2920 for row in rows:
2921 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2922 multiple_songs = True
2923 break
2924 for row in rows:
2925 mrr = row.get('metadataRowRenderer') or {}
2926 mrr_title = mrr.get('title')
2927 if not mrr_title:
2928 continue
2929 mrr_title = get_text(mrr['title'])
2930 mrr_contents_text = get_text(mrr['contents'][0])
2931 if mrr_title == 'License':
2932 info['license'] = mrr_contents_text
2933 elif not multiple_songs:
2934 if mrr_title == 'Album':
2935 info['album'] = mrr_contents_text
2936 elif mrr_title == 'Artist':
2937 info['artist'] = mrr_contents_text
2938 elif mrr_title == 'Song':
2939 info['track'] = mrr_contents_text
2940
2941 fallbacks = {
2942 'channel': 'uploader',
2943 'channel_id': 'uploader_id',
2944 'channel_url': 'uploader_url',
2945 }
2946 for to, frm in fallbacks.items():
2947 if not info.get(to):
2948 info[to] = info.get(frm)
2949
2950 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2951 v = info.get(s_k)
2952 if v:
2953 info[d_k] = v
2954
2955 is_private = bool_or_none(video_details.get('isPrivate'))
2956 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2957 is_membersonly = None
2958 is_premium = None
2959 if initial_data and is_private is not None:
2960 is_membersonly = False
2961 is_premium = False
2962 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2963 for content in contents or []:
2964 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2965 for badge in badges or []:
2966 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2967 if label.lower() == 'members only':
2968 is_membersonly = True
2969 break
2970 elif label.lower() == 'premium':
2971 is_premium = True
2972 break
2973 if is_membersonly or is_premium:
2974 break
2975
2976 # TODO: Add this for playlists
2977 info['availability'] = self._availability(
2978 is_private=is_private,
2979 needs_premium=is_premium,
2980 needs_subscription=is_membersonly,
2981 needs_auth=info['age_limit'] >= 18,
2982 is_unlisted=None if is_private is None else is_unlisted)
2983
2984 # get xsrf for annotations or comments
2985 get_annotations = self.get_param('writeannotations', False)
2986 get_comments = self.get_param('getcomments', False)
2987 if get_annotations or get_comments:
2988 xsrf_token = None
2989 ytcfg = self._extract_ytcfg(video_id, webpage)
2990 if ytcfg:
2991 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2992 if not xsrf_token:
2993 xsrf_token = self._search_regex(
2994 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
2995 webpage, 'xsrf token', group='xsrf_token', fatal=False)
2996
2997 # annotations
2998 if get_annotations:
2999 invideo_url = try_get(
3000 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
3001 if xsrf_token and invideo_url:
3002 xsrf_field_name = None
3003 if ytcfg:
3004 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3005 if not xsrf_field_name:
3006 xsrf_field_name = self._search_regex(
3007 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
3008 webpage, 'xsrf field name',
3009 group='xsrf_field_name', default='session_token')
3010 info['annotations'] = self._download_webpage(
3011 self._proto_relative_url(invideo_url),
3012 video_id, note='Downloading annotations',
3013 errnote='Unable to download video annotations', fatal=False,
3014 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3015
3016 if get_comments:
3017 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)
3018
3019 self.mark_watched(video_id, player_response)
3020
3021 return info
3022
3023
3024class YoutubeTabIE(YoutubeBaseInfoExtractor):
3025 IE_DESC = 'YouTube.com tab'
3026 _VALID_URL = r'''(?x)
3027 https?://
3028 (?:\w+\.)?
3029 (?:
3030 youtube(?:kids)?\.com|
3031 invidio\.us
3032 )/
3033 (?:
3034 (?P<channel_type>channel|c|user|browse)/|
3035 (?P<not_channel>
3036 feed/|hashtag/|
3037 (?:playlist|watch)\?.*?\blist=
3038 )|
3039 (?!(?:%s)\b) # Direct URLs
3040 )
3041 (?P<id>[^/?\#&]+)
3042 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3043 IE_NAME = 'youtube:tab'
3044
3045 _TESTS = [{
3046 'note': 'playlists, multipage',
3047 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3048 'playlist_mincount': 94,
3049 'info_dict': {
3050 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3051 'title': 'Игорь Клейнер - Playlists',
3052 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3053 'uploader': 'Игорь Клейнер',
3054 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3055 },
3056 }, {
3057 'note': 'playlists, multipage, different order',
3058 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3059 'playlist_mincount': 94,
3060 'info_dict': {
3061 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3062 'title': 'Игорь Клейнер - Playlists',
3063 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3064 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3065 'uploader': 'Игорь Клейнер',
3066 },
3067 }, {
3068 'note': 'playlists, series',
3069 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3070 'playlist_mincount': 5,
3071 'info_dict': {
3072 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3073 'title': '3Blue1Brown - Playlists',
3074 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3075 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3076 'uploader': '3Blue1Brown',
3077 },
3078 }, {
3079 'note': 'playlists, singlepage',
3080 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3081 'playlist_mincount': 4,
3082 'info_dict': {
3083 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3084 'title': 'ThirstForScience - Playlists',
3085 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3086 'uploader': 'ThirstForScience',
3087 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3088 }
3089 }, {
3090 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3091 'only_matching': True,
3092 }, {
3093 'note': 'basic, single video playlist',
3094 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3095 'info_dict': {
3096 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3097 'uploader': 'Sergey M.',
3098 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3099 'title': 'youtube-dl public playlist',
3100 },
3101 'playlist_count': 1,
3102 }, {
3103 'note': 'empty playlist',
3104 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3105 'info_dict': {
3106 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3107 'uploader': 'Sergey M.',
3108 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3109 'title': 'youtube-dl empty playlist',
3110 },
3111 'playlist_count': 0,
3112 }, {
3113 'note': 'Home tab',
3114 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3115 'info_dict': {
3116 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3117 'title': 'lex will - Home',
3118 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3119 'uploader': 'lex will',
3120 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3121 },
3122 'playlist_mincount': 2,
3123 }, {
3124 'note': 'Videos tab',
3125 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3126 'info_dict': {
3127 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3128 'title': 'lex will - Videos',
3129 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3130 'uploader': 'lex will',
3131 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3132 },
3133 'playlist_mincount': 975,
3134 }, {
3135 'note': 'Videos tab, sorted by popular',
3136 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3137 'info_dict': {
3138 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3139 'title': 'lex will - Videos',
3140 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3141 'uploader': 'lex will',
3142 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3143 },
3144 'playlist_mincount': 199,
3145 }, {
3146 'note': 'Playlists tab',
3147 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3148 'info_dict': {
3149 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3150 'title': 'lex will - Playlists',
3151 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3152 'uploader': 'lex will',
3153 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3154 },
3155 'playlist_mincount': 17,
3156 }, {
3157 'note': 'Community tab',
3158 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3159 'info_dict': {
3160 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3161 'title': 'lex will - Community',
3162 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3163 'uploader': 'lex will',
3164 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3165 },
3166 'playlist_mincount': 18,
3167 }, {
3168 'note': 'Channels tab',
3169 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3170 'info_dict': {
3171 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3172 'title': 'lex will - Channels',
3173 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3174 'uploader': 'lex will',
3175 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3176 },
3177 'playlist_mincount': 12,
3178 }, {
3179 'note': 'Search tab',
3180 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3181 'playlist_mincount': 40,
3182 'info_dict': {
3183 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3184 'title': '3Blue1Brown - Search - linear algebra',
3185 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3186 'uploader': '3Blue1Brown',
3187 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3188 },
3189 }, {
3190 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3191 'only_matching': True,
3192 }, {
3193 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3194 'only_matching': True,
3195 }, {
3196 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3197 'only_matching': True,
3198 }, {
3199 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3200 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3201 'info_dict': {
3202 'title': '29C3: Not my department',
3203 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3204 'uploader': 'Christiaan008',
3205 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3206 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3207 },
3208 'playlist_count': 96,
3209 }, {
3210 'note': 'Large playlist',
3211 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3212 'info_dict': {
3213 'title': 'Uploads from Cauchemar',
3214 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3215 'uploader': 'Cauchemar',
3216 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3217 },
3218 'playlist_mincount': 1123,
3219 }, {
3220 'note': 'even larger playlist, 8832 videos',
3221 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3222 'only_matching': True,
3223 }, {
3224 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3225 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3226 'info_dict': {
3227 'title': 'Uploads from Interstellar Movie',
3228 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3229 'uploader': 'Interstellar Movie',
3230 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3231 },
3232 'playlist_mincount': 21,
3233 }, {
3234 'note': 'Playlist with "show unavailable videos" button',
3235 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3236 'info_dict': {
3237 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3238 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3239 'uploader': 'Phim Siêu Nhân Nhật Bản',
3240 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3241 },
3242 'playlist_mincount': 200,
3243 }, {
3244 'note': 'Playlist with unavailable videos in page 7',
3245 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3246 'info_dict': {
3247 'title': 'Uploads from BlankTV',
3248 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3249 'uploader': 'BlankTV',
3250 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3251 },
3252 'playlist_mincount': 1000,
3253 }, {
3254 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3255 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3256 'info_dict': {
3257 'title': 'Data Analysis with Dr Mike Pound',
3258 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3259 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3260 'uploader': 'Computerphile',
3261 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3262 },
3263 'playlist_mincount': 11,
3264 }, {
3265 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3266 'only_matching': True,
3267 }, {
3268 'note': 'Playlist URL that does not actually serve a playlist',
3269 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3270 'info_dict': {
3271 'id': 'FqZTN594JQw',
3272 'ext': 'webm',
3273 'title': "Smiley's People 01 detective, Adventure Series, Action",
3274 'uploader': 'STREEM',
3275 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3276 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3277 'upload_date': '20150526',
3278 'license': 'Standard YouTube License',
3279 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3280 'categories': ['People & Blogs'],
3281 'tags': list,
3282 'view_count': int,
3283 'like_count': int,
3284 'dislike_count': int,
3285 },
3286 'params': {
3287 'skip_download': True,
3288 },
3289 'skip': 'This video is not available.',
3290 'add_ie': [YoutubeIE.ie_key()],
3291 }, {
3292 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3293 'only_matching': True,
3294 }, {
3295 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3296 'only_matching': True,
3297 }, {
3298 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3299 'info_dict': {
3300 'id': 'X1whbWASnNQ', # This will keep changing
3301 'ext': 'mp4',
3302 'title': compat_str,
3303 'uploader': 'Sky News',
3304 'uploader_id': 'skynews',
3305 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3306 'upload_date': r're:\d{8}',
3307 'description': compat_str,
3308 'categories': ['News & Politics'],
3309 'tags': list,
3310 'like_count': int,
3311 'dislike_count': int,
3312 },
3313 'params': {
3314 'skip_download': True,
3315 },
3316 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3317 }, {
3318 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3319 'info_dict': {
3320 'id': 'a48o2S1cPoo',
3321 'ext': 'mp4',
3322 'title': 'The Young Turks - Live Main Show',
3323 'uploader': 'The Young Turks',
3324 'uploader_id': 'TheYoungTurks',
3325 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3326 'upload_date': '20150715',
3327 'license': 'Standard YouTube License',
3328 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3329 'categories': ['News & Politics'],
3330 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3331 'like_count': int,
3332 'dislike_count': int,
3333 },
3334 'params': {
3335 'skip_download': True,
3336 },
3337 'only_matching': True,
3338 }, {
3339 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3340 'only_matching': True,
3341 }, {
3342 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3343 'only_matching': True,
3344 }, {
3345 'note': 'A channel that is not live. Should raise error',
3346 'url': 'https://www.youtube.com/user/numberphile/live',
3347 'only_matching': True,
3348 }, {
3349 'url': 'https://www.youtube.com/feed/trending',
3350 'only_matching': True,
3351 }, {
3352 'url': 'https://www.youtube.com/feed/library',
3353 'only_matching': True,
3354 }, {
3355 'url': 'https://www.youtube.com/feed/history',
3356 'only_matching': True,
3357 }, {
3358 'url': 'https://www.youtube.com/feed/subscriptions',
3359 'only_matching': True,
3360 }, {
3361 'url': 'https://www.youtube.com/feed/watch_later',
3362 'only_matching': True,
3363 }, {
3364 'note': 'Recommended - redirects to home page',
3365 'url': 'https://www.youtube.com/feed/recommended',
3366 'only_matching': True,
3367 }, {
3368 'note': 'inline playlist with not always working continuations',
3369 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3370 'only_matching': True,
3371 }, {
3372 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3373 'only_matching': True,
3374 }, {
3375 'url': 'https://www.youtube.com/course',
3376 'only_matching': True,
3377 }, {
3378 'url': 'https://www.youtube.com/zsecurity',
3379 'only_matching': True,
3380 }, {
3381 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3382 'only_matching': True,
3383 }, {
3384 'url': 'https://www.youtube.com/TheYoungTurks/live',
3385 'only_matching': True,
3386 }, {
3387 'url': 'https://www.youtube.com/hashtag/cctv9',
3388 'info_dict': {
3389 'id': 'cctv9',
3390 'title': '#cctv9',
3391 },
3392 'playlist_mincount': 350,
3393 }, {
3394 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3395 'only_matching': True,
3396 }, {
3397 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3398 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3399 'only_matching': True
3400 }, {
3401 'note': '/browse/ should redirect to /channel/',
3402 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3403 'only_matching': True
3404 }, {
3405 'note': 'VLPL, should redirect to playlist?list=PL...',
3406 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3407 'info_dict': {
3408 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3409 'uploader': 'NoCopyrightSounds',
3410 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3411 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3412 'title': 'NCS Releases',
3413 },
3414 'playlist_mincount': 166,
3415 }, {
3416 'note': 'Topic, should redirect to playlist?list=UU...',
3417 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3418 'info_dict': {
3419 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3420 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3421 'title': 'Uploads from Royalty Free Music - Topic',
3422 'uploader': 'Royalty Free Music - Topic',
3423 },
3424 'expected_warnings': [
3425 'A channel/user page was given',
3426 'The URL does not have a videos tab',
3427 ],
3428 'playlist_mincount': 101,
3429 }, {
3430 'note': 'Topic without a UU playlist',
3431 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3432 'info_dict': {
3433 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3434 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3435 },
3436 'expected_warnings': [
3437 'A channel/user page was given',
3438 'The URL does not have a videos tab',
3439 'Falling back to channel URL',
3440 ],
3441 'playlist_mincount': 9,
3442 }, {
3443 'note': 'Youtube music Album',
3444 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3445 'info_dict': {
3446 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3447 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3448 },
3449 'playlist_count': 50,
3450 }]
3451
3452 @classmethod
3453 def suitable(cls, url):
3454 return False if YoutubeIE.suitable(url) else super(
3455 YoutubeTabIE, cls).suitable(url)
3456
3457 def _extract_channel_id(self, webpage):
3458 channel_id = self._html_search_meta(
3459 'channelId', webpage, 'channel id', default=None)
3460 if channel_id:
3461 return channel_id
3462 channel_url = self._html_search_meta(
3463 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3464 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3465 'twitter:app:url:googleplay'), webpage, 'channel url')
3466 return self._search_regex(
3467 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3468 channel_url, 'channel id')
3469
3470 @staticmethod
3471 def _extract_basic_item_renderer(item):
3472 # Modified from _extract_grid_item_renderer
3473 known_basic_renderers = (
3474 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3475 )
3476 for key, renderer in item.items():
3477 if not isinstance(renderer, dict):
3478 continue
3479 elif key in known_basic_renderers:
3480 return renderer
3481 elif key.startswith('grid') and key.endswith('Renderer'):
3482 return renderer
3483
3484 def _grid_entries(self, grid_renderer):
3485 for item in grid_renderer['items']:
3486 if not isinstance(item, dict):
3487 continue
3488 renderer = self._extract_basic_item_renderer(item)
3489 if not isinstance(renderer, dict):
3490 continue
3491 title = try_get(
3492 renderer, (lambda x: x['title']['runs'][0]['text'],
3493 lambda x: x['title']['simpleText']), compat_str)
3494 # playlist
3495 playlist_id = renderer.get('playlistId')
3496 if playlist_id:
3497 yield self.url_result(
3498 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3499 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3500 video_title=title)
3501 continue
3502 # video
3503 video_id = renderer.get('videoId')
3504 if video_id:
3505 yield self._extract_video(renderer)
3506 continue
3507 # channel
3508 channel_id = renderer.get('channelId')
3509 if channel_id:
3510 title = try_get(
3511 renderer, lambda x: x['title']['simpleText'], compat_str)
3512 yield self.url_result(
3513 'https://www.youtube.com/channel/%s' % channel_id,
3514 ie=YoutubeTabIE.ie_key(), video_title=title)
3515 continue
3516 # generic endpoint URL support
3517 ep_url = urljoin('https://www.youtube.com/', try_get(
3518 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3519 compat_str))
3520 if ep_url:
3521 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3522 if ie.suitable(ep_url):
3523 yield self.url_result(
3524 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3525 break
3526
3527 def _shelf_entries_from_content(self, shelf_renderer):
3528 content = shelf_renderer.get('content')
3529 if not isinstance(content, dict):
3530 return
3531 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3532 if renderer:
3533 # TODO: add support for nested playlists so each shelf is processed
3534 # as separate playlist
3535 # TODO: this includes only first N items
3536 for entry in self._grid_entries(renderer):
3537 yield entry
3538 renderer = content.get('horizontalListRenderer')
3539 if renderer:
3540 # TODO
3541 pass
3542
3543 def _shelf_entries(self, shelf_renderer, skip_channels=False):
3544 ep = try_get(
3545 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3546 compat_str)
3547 shelf_url = urljoin('https://www.youtube.com', ep)
3548 if shelf_url:
3549 # Skipping links to another channels, note that checking for
3550 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3551 # will not work
3552 if skip_channels and '/channels?' in shelf_url:
3553 return
3554 title = try_get(
3555 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3556 yield self.url_result(shelf_url, video_title=title)
3557 # Shelf may not contain shelf URL, fallback to extraction from content
3558 for entry in self._shelf_entries_from_content(shelf_renderer):
3559 yield entry
3560
3561 def _playlist_entries(self, video_list_renderer):
3562 for content in video_list_renderer['contents']:
3563 if not isinstance(content, dict):
3564 continue
3565 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3566 if not isinstance(renderer, dict):
3567 continue
3568 video_id = renderer.get('videoId')
3569 if not video_id:
3570 continue
3571 yield self._extract_video(renderer)
3572
3573 def _rich_entries(self, rich_grid_renderer):
3574 renderer = try_get(
3575 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3576 video_id = renderer.get('videoId')
3577 if not video_id:
3578 return
3579 yield self._extract_video(renderer)
3580
3581 def _video_entry(self, video_renderer):
3582 video_id = video_renderer.get('videoId')
3583 if video_id:
3584 return self._extract_video(video_renderer)
3585
3586 def _post_thread_entries(self, post_thread_renderer):
3587 post_renderer = try_get(
3588 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3589 if not post_renderer:
3590 return
3591 # video attachment
3592 video_renderer = try_get(
3593 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3594 video_id = video_renderer.get('videoId')
3595 if video_id:
3596 entry = self._extract_video(video_renderer)
3597 if entry:
3598 yield entry
3599 # playlist attachment
3600 playlist_id = try_get(
3601 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3602 if playlist_id:
3603 yield self.url_result(
3604 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3605 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3606 # inline video links
3607 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3608 for run in runs:
3609 if not isinstance(run, dict):
3610 continue
3611 ep_url = try_get(
3612 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3613 if not ep_url:
3614 continue
3615 if not YoutubeIE.suitable(ep_url):
3616 continue
3617 ep_video_id = YoutubeIE._match_id(ep_url)
3618 if video_id == ep_video_id:
3619 continue
3620 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3621
3622 def _post_thread_continuation_entries(self, post_thread_continuation):
3623 contents = post_thread_continuation.get('contents')
3624 if not isinstance(contents, list):
3625 return
3626 for content in contents:
3627 renderer = content.get('backstagePostThreadRenderer')
3628 if not isinstance(renderer, dict):
3629 continue
3630 for entry in self._post_thread_entries(renderer):
3631 yield entry
3632
3633 r''' # unused
3634 def _rich_grid_entries(self, contents):
3635 for content in contents:
3636 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3637 if video_renderer:
3638 entry = self._video_entry(video_renderer)
3639 if entry:
3640 yield entry
3641 '''
3642 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3643
3644 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3645 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3646 for content in contents:
3647 if not isinstance(content, dict):
3648 continue
3649 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3650 if not is_renderer:
3651 renderer = content.get('richItemRenderer')
3652 if renderer:
3653 for entry in self._rich_entries(renderer):
3654 yield entry
3655 continuation_list[0] = self._extract_continuation(parent_renderer)
3656 continue
3657 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3658 for isr_content in isr_contents:
3659 if not isinstance(isr_content, dict):
3660 continue
3661
3662 known_renderers = {
3663 'playlistVideoListRenderer': self._playlist_entries,
3664 'gridRenderer': self._grid_entries,
3665 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3666 'backstagePostThreadRenderer': self._post_thread_entries,
3667 'videoRenderer': lambda x: [self._video_entry(x)],
3668 }
3669 for key, renderer in isr_content.items():
3670 if key not in known_renderers:
3671 continue
3672 for entry in known_renderers[key](renderer):
3673 if entry:
3674 yield entry
3675 continuation_list[0] = self._extract_continuation(renderer)
3676 break
3677
3678 if not continuation_list[0]:
3679 continuation_list[0] = self._extract_continuation(is_renderer)
3680
3681 if not continuation_list[0]:
3682 continuation_list[0] = self._extract_continuation(parent_renderer)
3683
3684 continuation_list = [None] # Python 2 doesnot support nonlocal
3685 tab_content = try_get(tab, lambda x: x['content'], dict)
3686 if not tab_content:
3687 return
3688 parent_renderer = (
3689 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3690 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3691 for entry in extract_entries(parent_renderer):
3692 yield entry
3693 continuation = continuation_list[0]
3694 context = self._extract_context(ytcfg)
3695 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
3696
3697 for page_num in itertools.count(1):
3698 if not continuation:
3699 break
3700 query = {
3701 'continuation': continuation['continuation'],
3702 'clickTracking': {'clickTrackingParams': continuation['itct']}
3703 }
3704 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3705 response = self._extract_response(
3706 item_id='%s page %s' % (item_id, page_num),
3707 query=query, headers=headers, ytcfg=ytcfg,
3708 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3709
3710 if not response:
3711 break
3712 visitor_data = try_get(
3713 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3714
3715 known_continuation_renderers = {
3716 'playlistVideoListContinuation': self._playlist_entries,
3717 'gridContinuation': self._grid_entries,
3718 'itemSectionContinuation': self._post_thread_continuation_entries,
3719 'sectionListContinuation': extract_entries, # for feeds
3720 }
3721 continuation_contents = try_get(
3722 response, lambda x: x['continuationContents'], dict) or {}
3723 continuation_renderer = None
3724 for key, value in continuation_contents.items():
3725 if key not in known_continuation_renderers:
3726 continue
3727 continuation_renderer = value
3728 continuation_list = [None]
3729 for entry in known_continuation_renderers[key](continuation_renderer):
3730 yield entry
3731 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3732 break
3733 if continuation_renderer:
3734 continue
3735
3736 known_renderers = {
3737 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3738 'gridVideoRenderer': (self._grid_entries, 'items'),
3739 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3740 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
3741 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
3742 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3743 }
3744 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3745 continuation_items = try_get(
3746 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3747 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3748 video_items_renderer = None
3749 for key, value in continuation_item.items():
3750 if key not in known_renderers:
3751 continue
3752 video_items_renderer = {known_renderers[key][1]: continuation_items}
3753 continuation_list = [None]
3754 for entry in known_renderers[key][0](video_items_renderer):
3755 yield entry
3756 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3757 break
3758 if video_items_renderer:
3759 continue
3760 break
3761
3762 @staticmethod
3763 def _extract_selected_tab(tabs):
3764 for tab in tabs:
3765 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3766 if renderer.get('selected') is True:
3767 return renderer
3768 else:
3769 raise ExtractorError('Unable to find selected tab')
3770
3771 @staticmethod
3772 def _extract_uploader(data):
3773 uploader = {}
3774 sidebar_renderer = try_get(
3775 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3776 if sidebar_renderer:
3777 for item in sidebar_renderer:
3778 if not isinstance(item, dict):
3779 continue
3780 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3781 if not isinstance(renderer, dict):
3782 continue
3783 owner = try_get(
3784 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3785 if owner:
3786 uploader['uploader'] = owner.get('text')
3787 uploader['uploader_id'] = try_get(
3788 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3789 uploader['uploader_url'] = urljoin(
3790 'https://www.youtube.com/',
3791 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3792 return {k: v for k, v in uploader.items() if v is not None}
3793
3794 def _extract_from_tabs(self, item_id, webpage, data, tabs):
3795 playlist_id = title = description = channel_url = channel_name = channel_id = None
3796 thumbnails_list = tags = []
3797
3798 selected_tab = self._extract_selected_tab(tabs)
3799 renderer = try_get(
3800 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3801 if renderer:
3802 channel_name = renderer.get('title')
3803 channel_url = renderer.get('channelUrl')
3804 channel_id = renderer.get('externalId')
3805 else:
3806 renderer = try_get(
3807 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3808
3809 if renderer:
3810 title = renderer.get('title')
3811 description = renderer.get('description', '')
3812 playlist_id = channel_id
3813 tags = renderer.get('keywords', '').split()
3814 thumbnails_list = (
3815 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3816 or try_get(
3817 data,
3818 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3819 list)
3820 or [])
3821
3822 thumbnails = []
3823 for t in thumbnails_list:
3824 if not isinstance(t, dict):
3825 continue
3826 thumbnail_url = url_or_none(t.get('url'))
3827 if not thumbnail_url:
3828 continue
3829 thumbnails.append({
3830 'url': thumbnail_url,
3831 'width': int_or_none(t.get('width')),
3832 'height': int_or_none(t.get('height')),
3833 })
3834 if playlist_id is None:
3835 playlist_id = item_id
3836 if title is None:
3837 title = (
3838 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3839 or playlist_id)
3840 title += format_field(selected_tab, 'title', ' - %s')
3841 title += format_field(selected_tab, 'expandedText', ' - %s')
3842
3843 metadata = {
3844 'playlist_id': playlist_id,
3845 'playlist_title': title,
3846 'playlist_description': description,
3847 'uploader': channel_name,
3848 'uploader_id': channel_id,
3849 'uploader_url': channel_url,
3850 'thumbnails': thumbnails,
3851 'tags': tags,
3852 }
3853 if not channel_id:
3854 metadata.update(self._extract_uploader(data))
3855 metadata.update({
3856 'channel': metadata['uploader'],
3857 'channel_id': metadata['uploader_id'],
3858 'channel_url': metadata['uploader_url']})
3859 return self.playlist_result(
3860 self._entries(
3861 selected_tab, playlist_id,
3862 self._extract_identity_token(webpage, item_id),
3863 self._extract_account_syncid(data),
3864 self._extract_ytcfg(item_id, webpage)),
3865 **metadata)
3866
3867 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
3868 first_id = last_id = None
3869 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3870 headers = self._generate_api_headers(
3871 ytcfg, account_syncid=self._extract_account_syncid(data),
3872 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3873 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3874 for page_num in itertools.count(1):
3875 videos = list(self._playlist_entries(playlist))
3876 if not videos:
3877 return
3878 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3879 if start >= len(videos):
3880 return
3881 for video in videos[start:]:
3882 if video['id'] == first_id:
3883 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3884 return
3885 yield video
3886 first_id = first_id or videos[0]['id']
3887 last_id = videos[-1]['id']
3888 watch_endpoint = try_get(
3889 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3890 query = {
3891 'playlistId': playlist_id,
3892 'videoId': watch_endpoint.get('videoId') or last_id,
3893 'index': watch_endpoint.get('index') or len(videos),
3894 'params': watch_endpoint.get('params') or 'OAE%3D'
3895 }
3896 response = self._extract_response(
3897 item_id='%s page %d' % (playlist_id, page_num),
3898 query=query,
3899 ep='next',
3900 headers=headers,
3901 check_get_keys='contents'
3902 )
3903 playlist = try_get(
3904 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3905
3906 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
3907 title = playlist.get('title') or try_get(
3908 data, lambda x: x['titleText']['simpleText'], compat_str)
3909 playlist_id = playlist.get('playlistId') or item_id
3910
3911 # Delegating everything except mix playlists to regular tab-based playlist URL
3912 playlist_url = urljoin(url, try_get(
3913 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3914 compat_str))
3915 if playlist_url and playlist_url != url:
3916 return self.url_result(
3917 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3918 video_title=title)
3919
3920 return self.playlist_result(
3921 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
3922 playlist_id=playlist_id, playlist_title=title)
3923
3924 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3925 """
3926 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3927 """
3928 sidebar_renderer = try_get(
3929 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3930 if not sidebar_renderer:
3931 return
3932 browse_id = params = None
3933 for item in sidebar_renderer:
3934 if not isinstance(item, dict):
3935 continue
3936 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3937 menu_renderer = try_get(
3938 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3939 for menu_item in menu_renderer:
3940 if not isinstance(menu_item, dict):
3941 continue
3942 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3943 text = try_get(
3944 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3945 if not text or text.lower() != 'show unavailable videos':
3946 continue
3947 browse_endpoint = try_get(
3948 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3949 browse_id = browse_endpoint.get('browseId')
3950 params = browse_endpoint.get('params')
3951 break
3952
3953 ytcfg = self._extract_ytcfg(item_id, webpage)
3954 headers = self._generate_api_headers(
3955 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3956 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3957 visitor_data=try_get(
3958 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3959 query = {
3960 'params': params or 'wgYCCAA=',
3961 'browseId': browse_id or 'VL%s' % item_id
3962 }
3963 return self._extract_response(
3964 item_id=item_id, headers=headers, query=query,
3965 check_get_keys='contents', fatal=False,
3966 note='Downloading API JSON with unavailable videos')
3967
3968 def _extract_webpage(self, url, item_id):
3969 retries = self.get_param('extractor_retries', 3)
3970 count = -1
3971 last_error = 'Incomplete yt initial data recieved'
3972 while count < retries:
3973 count += 1
3974 # Sometimes youtube returns a webpage with incomplete ytInitialData
3975 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3976 if count:
3977 self.report_warning('%s. Retrying ...' % last_error)
3978 webpage = self._download_webpage(
3979 url, item_id,
3980 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
3981 data = self._extract_yt_initial_data(item_id, webpage)
3982 if data.get('contents') or data.get('currentVideoEndpoint'):
3983 break
3984 # Extract alerts here only when there is error
3985 self._extract_and_report_alerts(data)
3986 if count >= retries:
3987 raise ExtractorError(last_error)
3988 return webpage, data
3989
3990 @staticmethod
3991 def _smuggle_data(entries, data):
3992 for entry in entries:
3993 if data:
3994 entry['url'] = smuggle_url(entry['url'], data)
3995 yield entry
3996
3997 def _real_extract(self, url):
3998 url, smuggled_data = unsmuggle_url(url, {})
3999 if self.is_music_url(url):
4000 smuggled_data['is_music_url'] = True
4001 info_dict = self.__real_extract(url, smuggled_data)
4002 if info_dict.get('entries'):
4003 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4004 return info_dict
4005
4006 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4007
4008 def __real_extract(self, url, smuggled_data):
4009 item_id = self._match_id(url)
4010 url = compat_urlparse.urlunparse(
4011 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4012 compat_opts = self.get_param('compat_opts', [])
4013
4014 def get_mobj(url):
4015 mobj = self._url_re.match(url).groupdict()
4016 mobj.update((k, '') for k, v in mobj.items() if v is None)
4017 return mobj
4018
4019 mobj = get_mobj(url)
4020 # Youtube returns incomplete data if tabname is not lower case
4021 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4022
4023 if is_channel:
4024 if smuggled_data.get('is_music_url'):
4025 if item_id[:2] == 'VL':
4026 # Youtube music VL channels have an equivalent playlist
4027 item_id = item_id[2:]
4028 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4029 elif item_id[:2] == 'MP':
4030 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4031 item_id = self._search_regex(
4032 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4033 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4034 'playlist id')
4035 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4036 elif mobj['channel_type'] == 'browse':
4037 # Youtube music /browse/ should be changed to /channel/
4038 pre = 'https://www.youtube.com/channel/%s' % item_id
4039 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4040 # Home URLs should redirect to /videos/
4041 self.report_warning(
4042 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4043 'To download only the videos in the home page, add a "/featured" to the URL')
4044 tab = '/videos'
4045
4046 url = ''.join((pre, tab, post))
4047 mobj = get_mobj(url)
4048
4049 # Handle both video/playlist URLs
4050 qs = parse_qs(url)
4051 video_id = qs.get('v', [None])[0]
4052 playlist_id = qs.get('list', [None])[0]
4053
4054 if not video_id and mobj['not_channel'].startswith('watch'):
4055 if not playlist_id:
4056 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4057 raise ExtractorError('Unable to recognize tab page')
4058 # Common mistake: https://www.youtube.com/watch?list=playlist_id
4059 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4060 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4061 mobj = get_mobj(url)
4062
4063 if video_id and playlist_id:
4064 if self.get_param('noplaylist'):
4065 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4066 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4067 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4068
4069 webpage, data = self._extract_webpage(url, item_id)
4070
4071 tabs = try_get(
4072 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4073 if tabs:
4074 selected_tab = self._extract_selected_tab(tabs)
4075 tab_name = selected_tab.get('title', '')
4076 if 'no-youtube-channel-redirect' not in compat_opts:
4077 if mobj['tab'] == '/live':
4078 # Live tab should have redirected to the video
4079 raise ExtractorError('The channel is not currently live', expected=True)
4080 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4081 if not mobj['not_channel'] and item_id[:2] == 'UC':
4082 # Topic channels don't have /videos. Use the equivalent playlist instead
4083 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4084 pl_id = 'UU%s' % item_id[2:]
4085 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4086 try:
4087 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4088 for alert_type, alert_message in self._extract_alerts(pl_data):
4089 if alert_type == 'error':
4090 raise ExtractorError('Youtube said: %s' % alert_message)
4091 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4092 except ExtractorError:
4093 self.report_warning('The playlist gave error. Falling back to channel URL')
4094 else:
4095 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4096
4097 self.write_debug('Final URL: %s' % url)
4098
4099 # YouTube sometimes provides a button to reload playlist with unavailable videos.
4100 if 'no-youtube-unavailable-videos' not in compat_opts:
4101 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4102 self._extract_and_report_alerts(data)
4103
4104 tabs = try_get(
4105 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4106 if tabs:
4107 return self._extract_from_tabs(item_id, webpage, data, tabs)
4108
4109 playlist = try_get(
4110 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4111 if playlist:
4112 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4113
4114 video_id = try_get(
4115 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4116 compat_str) or video_id
4117 if video_id:
4118 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4119 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4120 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4121
4122 raise ExtractorError('Unable to recognize tab page')
4123
4124
4125class YoutubePlaylistIE(InfoExtractor):
4126 IE_DESC = 'YouTube.com playlists'
4127 _VALID_URL = r'''(?x)(?:
4128 (?:https?://)?
4129 (?:\w+\.)?
4130 (?:
4131 (?:
4132 youtube(?:kids)?\.com|
4133 invidio\.us
4134 )
4135 /.*?\?.*?\blist=
4136 )?
4137 (?P<id>%(playlist_id)s)
4138 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4139 IE_NAME = 'youtube:playlist'
4140 _TESTS = [{
4141 'note': 'issue #673',
4142 'url': 'PLBB231211A4F62143',
4143 'info_dict': {
4144 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4145 'id': 'PLBB231211A4F62143',
4146 'uploader': 'Wickydoo',
4147 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4148 },
4149 'playlist_mincount': 29,
4150 }, {
4151 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4152 'info_dict': {
4153 'title': 'YDL_safe_search',
4154 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4155 },
4156 'playlist_count': 2,
4157 'skip': 'This playlist is private',
4158 }, {
4159 'note': 'embedded',
4160 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4161 'playlist_count': 4,
4162 'info_dict': {
4163 'title': 'JODA15',
4164 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4165 'uploader': 'milan',
4166 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4167 }
4168 }, {
4169 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4170 'playlist_mincount': 982,
4171 'info_dict': {
4172 'title': '2018 Chinese New Singles (11/6 updated)',
4173 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4174 'uploader': 'LBK',
4175 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4176 }
4177 }, {
4178 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4179 'only_matching': True,
4180 }, {
4181 # music album playlist
4182 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4183 'only_matching': True,
4184 }]
4185
4186 @classmethod
4187 def suitable(cls, url):
4188 if YoutubeTabIE.suitable(url):
4189 return False
4190 # Hack for lazy extractors until more generic solution is implemented
4191 # (see #28780)
4192 from .youtube import parse_qs
4193 qs = parse_qs(url)
4194 if qs.get('v', [None])[0]:
4195 return False
4196 return super(YoutubePlaylistIE, cls).suitable(url)
4197
4198 def _real_extract(self, url):
4199 playlist_id = self._match_id(url)
4200 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4201 url = update_url_query(
4202 'https://www.youtube.com/playlist',
4203 parse_qs(url) or {'list': playlist_id})
4204 if is_music_url:
4205 url = smuggle_url(url, {'is_music_url': True})
4206 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4207
4208
4209class YoutubeYtBeIE(InfoExtractor):
4210 IE_DESC = 'youtu.be'
4211 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4212 _TESTS = [{
4213 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4214 'info_dict': {
4215 'id': 'yeWKywCrFtk',
4216 'ext': 'mp4',
4217 'title': 'Small Scale Baler and Braiding Rugs',
4218 'uploader': 'Backus-Page House Museum',
4219 'uploader_id': 'backuspagemuseum',
4220 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4221 'upload_date': '20161008',
4222 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4223 'categories': ['Nonprofits & Activism'],
4224 'tags': list,
4225 'like_count': int,
4226 'dislike_count': int,
4227 },
4228 'params': {
4229 'noplaylist': True,
4230 'skip_download': True,
4231 },
4232 }, {
4233 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4234 'only_matching': True,
4235 }]
4236
4237 def _real_extract(self, url):
4238 mobj = re.match(self._VALID_URL, url)
4239 video_id = mobj.group('id')
4240 playlist_id = mobj.group('playlist_id')
4241 return self.url_result(
4242 update_url_query('https://www.youtube.com/watch', {
4243 'v': video_id,
4244 'list': playlist_id,
4245 'feature': 'youtu.be',
4246 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4247
4248
4249class YoutubeYtUserIE(InfoExtractor):
4250 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4251 _VALID_URL = r'ytuser:(?P<id>.+)'
4252 _TESTS = [{
4253 'url': 'ytuser:phihag',
4254 'only_matching': True,
4255 }]
4256
4257 def _real_extract(self, url):
4258 user_id = self._match_id(url)
4259 return self.url_result(
4260 'https://www.youtube.com/user/%s' % user_id,
4261 ie=YoutubeTabIE.ie_key(), video_id=user_id)
4262
4263
4264class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4265 IE_NAME = 'youtube:favorites'
4266 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4267 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4268 _LOGIN_REQUIRED = True
4269 _TESTS = [{
4270 'url': ':ytfav',
4271 'only_matching': True,
4272 }, {
4273 'url': ':ytfavorites',
4274 'only_matching': True,
4275 }]
4276
4277 def _real_extract(self, url):
4278 return self.url_result(
4279 'https://www.youtube.com/playlist?list=LL',
4280 ie=YoutubeTabIE.ie_key())
4281
4282
4283class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4284 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4285 # there doesn't appear to be a real limit, for example if you search for
4286 # 'python' you get more than 8.000.000 results
4287 _MAX_RESULTS = float('inf')
4288 IE_NAME = 'youtube:search'
4289 _SEARCH_KEY = 'ytsearch'
4290 _SEARCH_PARAMS = None
4291 _TESTS = []
4292
4293 def _entries(self, query, n):
4294 data = {'query': query}
4295 if self._SEARCH_PARAMS:
4296 data['params'] = self._SEARCH_PARAMS
4297 total = 0
4298 for page_num in itertools.count(1):
4299 search = self._extract_response(
4300 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4301 check_get_keys=('contents', 'onResponseReceivedCommands')
4302 )
4303 if not search:
4304 break
4305 slr_contents = try_get(
4306 search,
4307 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4308 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4309 list)
4310 if not slr_contents:
4311 break
4312
4313 # Youtube sometimes adds promoted content to searches,
4314 # changing the index location of videos and token.
4315 # So we search through all entries till we find them.
4316 continuation_token = None
4317 for slr_content in slr_contents:
4318 if continuation_token is None:
4319 continuation_token = try_get(
4320 slr_content,
4321 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
4322 compat_str)
4323
4324 isr_contents = try_get(
4325 slr_content,
4326 lambda x: x['itemSectionRenderer']['contents'],
4327 list)
4328 if not isr_contents:
4329 continue
4330 for content in isr_contents:
4331 if not isinstance(content, dict):
4332 continue
4333 video = content.get('videoRenderer')
4334 if not isinstance(video, dict):
4335 continue
4336 video_id = video.get('videoId')
4337 if not video_id:
4338 continue
4339
4340 yield self._extract_video(video)
4341 total += 1
4342 if total == n:
4343 return
4344
4345 if not continuation_token:
4346 break
4347 data['continuation'] = continuation_token
4348
4349 def _get_n_results(self, query, n):
4350 """Get a specified number of results for a query"""
4351 return self.playlist_result(self._entries(query, n), query)
4352
4353
4354class YoutubeSearchDateIE(YoutubeSearchIE):
4355 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4356 _SEARCH_KEY = 'ytsearchdate'
4357 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4358 _SEARCH_PARAMS = 'CAI%3D'
4359
4360
4361class YoutubeSearchURLIE(YoutubeSearchIE):
4362 IE_DESC = 'YouTube.com search URLs'
4363 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4364 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4365 # _MAX_RESULTS = 100
4366 _TESTS = [{
4367 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4368 'playlist_mincount': 5,
4369 'info_dict': {
4370 'title': 'youtube-dl test video',
4371 }
4372 }, {
4373 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4374 'only_matching': True,
4375 }]
4376
4377 @classmethod
4378 def _make_valid_url(cls):
4379 return cls._VALID_URL
4380
4381 def _real_extract(self, url):
4382 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4383 query = (qs.get('search_query') or qs.get('q'))[0]
4384 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4385 return self._get_n_results(query, self._MAX_RESULTS)
4386
4387
4388class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4389 """
4390 Base class for feed extractors
4391 Subclasses must define the _FEED_NAME property.
4392 """
4393 _LOGIN_REQUIRED = True
4394 _TESTS = []
4395
4396 @property
4397 def IE_NAME(self):
4398 return 'youtube:%s' % self._FEED_NAME
4399
4400 def _real_extract(self, url):
4401 return self.url_result(
4402 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4403 ie=YoutubeTabIE.ie_key())
4404
4405
4406class YoutubeWatchLaterIE(InfoExtractor):
4407 IE_NAME = 'youtube:watchlater'
4408 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4409 _VALID_URL = r':ytwatchlater'
4410 _TESTS = [{
4411 'url': ':ytwatchlater',
4412 'only_matching': True,
4413 }]
4414
4415 def _real_extract(self, url):
4416 return self.url_result(
4417 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4418
4419
4420class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4421 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4422 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4423 _FEED_NAME = 'recommended'
4424 _LOGIN_REQUIRED = False
4425 _TESTS = [{
4426 'url': ':ytrec',
4427 'only_matching': True,
4428 }, {
4429 'url': ':ytrecommended',
4430 'only_matching': True,
4431 }, {
4432 'url': 'https://youtube.com',
4433 'only_matching': True,
4434 }]
4435
4436
4437class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4438 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4439 _VALID_URL = r':ytsub(?:scription)?s?'
4440 _FEED_NAME = 'subscriptions'
4441 _TESTS = [{
4442 'url': ':ytsubs',
4443 'only_matching': True,
4444 }, {
4445 'url': ':ytsubscriptions',
4446 'only_matching': True,
4447 }]
4448
4449
4450class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4451 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4452 _VALID_URL = r':ythis(?:tory)?'
4453 _FEED_NAME = 'history'
4454 _TESTS = [{
4455 'url': ':ythistory',
4456 'only_matching': True,
4457 }]
4458
4459
4460class YoutubeTruncatedURLIE(InfoExtractor):
4461 IE_NAME = 'youtube:truncated_url'
4462 IE_DESC = False # Do not list
4463 _VALID_URL = r'''(?x)
4464 (?:https?://)?
4465 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4466 (?:watch\?(?:
4467 feature=[a-z_]+|
4468 annotation_id=annotation_[^&]+|
4469 x-yt-cl=[0-9]+|
4470 hl=[^&]*|
4471 t=[0-9]+
4472 )?
4473 |
4474 attribution_link\?a=[^&]+
4475 )
4476 $
4477 '''
4478
4479 _TESTS = [{
4480 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4481 'only_matching': True,
4482 }, {
4483 'url': 'https://www.youtube.com/watch?',
4484 'only_matching': True,
4485 }, {
4486 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4487 'only_matching': True,
4488 }, {
4489 'url': 'https://www.youtube.com/watch?feature=foo',
4490 'only_matching': True,
4491 }, {
4492 'url': 'https://www.youtube.com/watch?hl=en-GB',
4493 'only_matching': True,
4494 }, {
4495 'url': 'https://www.youtube.com/watch?t=2372',
4496 'only_matching': True,
4497 }]
4498
4499 def _real_extract(self, url):
4500 raise ExtractorError(
4501 'Did you forget to quote the URL? Remember that & is a meta '
4502 'character in most shells, so you want to put the URL in quotes, '
4503 'like youtube-dl '
4504 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4505 ' or simply youtube-dl BaW_jenozKc .',
4506 expected=True)
4507
4508
4509class YoutubeTruncatedIDIE(InfoExtractor):
4510 IE_NAME = 'youtube:truncated_id'
4511 IE_DESC = False # Do not list
4512 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4513
4514 _TESTS = [{
4515 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4516 'only_matching': True,
4517 }]
4518
4519 def _real_extract(self, url):
4520 video_id = self._match_id(url)
4521 raise ExtractorError(
4522 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4523 expected=True)