]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/extractor/youtube.py
[utils] Fix `PagedList`
[yt-dlp.git] / yt_dlp / extractor / youtube.py
... / ...
CommitLineData
1# coding: utf-8
2
3from __future__ import unicode_literals
4
5import base64
6import calendar
7import copy
8import datetime
9import hashlib
10import itertools
11import json
12import os.path
13import random
14import re
15import time
16import traceback
17
18from .common import InfoExtractor, SearchInfoExtractor
19from ..compat import (
20 compat_chr,
21 compat_HTTPError,
22 compat_parse_qs,
23 compat_str,
24 compat_urllib_parse_unquote_plus,
25 compat_urllib_parse_urlencode,
26 compat_urllib_parse_urlparse,
27 compat_urlparse,
28)
29from ..jsinterp import JSInterpreter
30from ..utils import (
31 bytes_to_intlist,
32 clean_html,
33 datetime_from_str,
34 dict_get,
35 error_to_compat_str,
36 ExtractorError,
37 float_or_none,
38 format_field,
39 int_or_none,
40 intlist_to_bytes,
41 is_html,
42 join_nonempty,
43 mimetype2ext,
44 network_exceptions,
45 orderedSet,
46 parse_codecs,
47 parse_count,
48 parse_duration,
49 parse_iso8601,
50 parse_qs,
51 qualities,
52 remove_end,
53 remove_start,
54 smuggle_url,
55 str_or_none,
56 str_to_int,
57 traverse_obj,
58 try_get,
59 unescapeHTML,
60 unified_strdate,
61 unsmuggle_url,
62 update_url_query,
63 url_or_none,
64 urljoin,
65 variadic,
66)
67
68
69# any clients starting with _ cannot be explicity requested by the user
70INNERTUBE_CLIENTS = {
71 'web': {
72 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
73 'INNERTUBE_CONTEXT': {
74 'client': {
75 'clientName': 'WEB',
76 'clientVersion': '2.20210622.10.00',
77 }
78 },
79 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
80 },
81 'web_embedded': {
82 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
83 'INNERTUBE_CONTEXT': {
84 'client': {
85 'clientName': 'WEB_EMBEDDED_PLAYER',
86 'clientVersion': '1.20210620.0.1',
87 },
88 },
89 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
90 },
91 'web_music': {
92 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
93 'INNERTUBE_HOST': 'music.youtube.com',
94 'INNERTUBE_CONTEXT': {
95 'client': {
96 'clientName': 'WEB_REMIX',
97 'clientVersion': '1.20210621.00.00',
98 }
99 },
100 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
101 },
102 'web_creator': {
103 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
104 'INNERTUBE_CONTEXT': {
105 'client': {
106 'clientName': 'WEB_CREATOR',
107 'clientVersion': '1.20210621.00.00',
108 }
109 },
110 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
111 },
112 'android': {
113 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
114 'INNERTUBE_CONTEXT': {
115 'client': {
116 'clientName': 'ANDROID',
117 'clientVersion': '16.20',
118 }
119 },
120 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
121 'REQUIRE_JS_PLAYER': False
122 },
123 'android_embedded': {
124 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
125 'INNERTUBE_CONTEXT': {
126 'client': {
127 'clientName': 'ANDROID_EMBEDDED_PLAYER',
128 'clientVersion': '16.20',
129 },
130 },
131 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
132 'REQUIRE_JS_PLAYER': False
133 },
134 'android_music': {
135 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
136 'INNERTUBE_HOST': 'music.youtube.com',
137 'INNERTUBE_CONTEXT': {
138 'client': {
139 'clientName': 'ANDROID_MUSIC',
140 'clientVersion': '4.32',
141 }
142 },
143 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
144 'REQUIRE_JS_PLAYER': False
145 },
146 'android_creator': {
147 'INNERTUBE_CONTEXT': {
148 'client': {
149 'clientName': 'ANDROID_CREATOR',
150 'clientVersion': '21.24.100',
151 },
152 },
153 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
154 'REQUIRE_JS_PLAYER': False
155 },
156 # ios has HLS live streams
157 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
158 'ios': {
159 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
160 'INNERTUBE_CONTEXT': {
161 'client': {
162 'clientName': 'IOS',
163 'clientVersion': '16.20',
164 }
165 },
166 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
167 'REQUIRE_JS_PLAYER': False
168 },
169 'ios_embedded': {
170 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
171 'INNERTUBE_CONTEXT': {
172 'client': {
173 'clientName': 'IOS_MESSAGES_EXTENSION',
174 'clientVersion': '16.20',
175 },
176 },
177 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
178 'REQUIRE_JS_PLAYER': False
179 },
180 'ios_music': {
181 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
182 'INNERTUBE_HOST': 'music.youtube.com',
183 'INNERTUBE_CONTEXT': {
184 'client': {
185 'clientName': 'IOS_MUSIC',
186 'clientVersion': '4.32',
187 },
188 },
189 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
190 'REQUIRE_JS_PLAYER': False
191 },
192 'ios_creator': {
193 'INNERTUBE_CONTEXT': {
194 'client': {
195 'clientName': 'IOS_CREATOR',
196 'clientVersion': '21.24.100',
197 },
198 },
199 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
200 'REQUIRE_JS_PLAYER': False
201 },
202 # mweb has 'ultralow' formats
203 # See: https://github.com/yt-dlp/yt-dlp/pull/557
204 'mweb': {
205 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
206 'INNERTUBE_CONTEXT': {
207 'client': {
208 'clientName': 'MWEB',
209 'clientVersion': '2.20210721.07.00',
210 }
211 },
212 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
213 },
214}
215
216
217def build_innertube_clients():
218 third_party = {
219 'embedUrl': 'https://google.com', # Can be any valid URL
220 }
221 base_clients = ('android', 'web', 'ios', 'mweb')
222 priority = qualities(base_clients[::-1])
223
224 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
225 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
226 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
227 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
228 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
229 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
230
231 if client in base_clients:
232 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
233 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
234 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
235 agegate_ytcfg['priority'] -= 1
236 elif client.endswith('_embedded'):
237 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
238 ytcfg['priority'] -= 2
239 else:
240 ytcfg['priority'] -= 3
241
242
243build_innertube_clients()
244
245
246class YoutubeBaseInfoExtractor(InfoExtractor):
247 """Provide base functions for Youtube extractors"""
248
249 _RESERVED_NAMES = (
250 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
251 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
252 r'browse|oembed|get_video_info|iframe_api|s/player|'
253 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
254
255 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
256
257 _NETRC_MACHINE = 'youtube'
258
259 # If True it will raise an error if no login info is provided
260 _LOGIN_REQUIRED = False
261
262 _INVIDIOUS_SITES = (
263 # invidious-redirect websites
264 r'(?:www\.)?redirect\.invidious\.io',
265 r'(?:(?:www|dev)\.)?invidio\.us',
266 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
267 r'(?:www\.)?invidious\.pussthecat\.org',
268 r'(?:www\.)?invidious\.zee\.li',
269 r'(?:www\.)?invidious\.ethibox\.fr',
270 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
271 # youtube-dl invidious instances list
272 r'(?:(?:www|no)\.)?invidiou\.sh',
273 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
274 r'(?:www\.)?invidious\.kabi\.tk',
275 r'(?:www\.)?invidious\.mastodon\.host',
276 r'(?:www\.)?invidious\.zapashcanon\.fr',
277 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
278 r'(?:www\.)?invidious\.tinfoil-hat\.net',
279 r'(?:www\.)?invidious\.himiko\.cloud',
280 r'(?:www\.)?invidious\.reallyancient\.tech',
281 r'(?:www\.)?invidious\.tube',
282 r'(?:www\.)?invidiou\.site',
283 r'(?:www\.)?invidious\.site',
284 r'(?:www\.)?invidious\.xyz',
285 r'(?:www\.)?invidious\.nixnet\.xyz',
286 r'(?:www\.)?invidious\.048596\.xyz',
287 r'(?:www\.)?invidious\.drycat\.fr',
288 r'(?:www\.)?inv\.skyn3t\.in',
289 r'(?:www\.)?tube\.poal\.co',
290 r'(?:www\.)?tube\.connect\.cafe',
291 r'(?:www\.)?vid\.wxzm\.sx',
292 r'(?:www\.)?vid\.mint\.lgbt',
293 r'(?:www\.)?vid\.puffyan\.us',
294 r'(?:www\.)?yewtu\.be',
295 r'(?:www\.)?yt\.elukerio\.org',
296 r'(?:www\.)?yt\.lelux\.fi',
297 r'(?:www\.)?invidious\.ggc-project\.de',
298 r'(?:www\.)?yt\.maisputain\.ovh',
299 r'(?:www\.)?ytprivate\.com',
300 r'(?:www\.)?invidious\.13ad\.de',
301 r'(?:www\.)?invidious\.toot\.koeln',
302 r'(?:www\.)?invidious\.fdn\.fr',
303 r'(?:www\.)?watch\.nettohikari\.com',
304 r'(?:www\.)?invidious\.namazso\.eu',
305 r'(?:www\.)?invidious\.silkky\.cloud',
306 r'(?:www\.)?invidious\.exonip\.de',
307 r'(?:www\.)?invidious\.riverside\.rocks',
308 r'(?:www\.)?invidious\.blamefran\.net',
309 r'(?:www\.)?invidious\.moomoo\.de',
310 r'(?:www\.)?ytb\.trom\.tf',
311 r'(?:www\.)?yt\.cyberhost\.uk',
312 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
313 r'(?:www\.)?qklhadlycap4cnod\.onion',
314 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
315 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
316 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
317 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
318 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
319 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
320 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
321 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
322 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
323 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
324 )
325
326 def _login(self):
327 """
328 Attempt to log in to YouTube.
329 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
330 """
331
332 if (self._LOGIN_REQUIRED
333 and self.get_param('cookiefile') is None
334 and self.get_param('cookiesfrombrowser') is None):
335 self.raise_login_required(
336 'Login details are needed to download this content', method='cookies')
337 username, password = self._get_login_info()
338 if username:
339 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
340
341 def _initialize_consent(self):
342 cookies = self._get_cookies('https://www.youtube.com/')
343 if cookies.get('__Secure-3PSID'):
344 return
345 consent_id = None
346 consent = cookies.get('CONSENT')
347 if consent:
348 if 'YES' in consent.value:
349 return
350 consent_id = self._search_regex(
351 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
352 if not consent_id:
353 consent_id = random.randint(100, 999)
354 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
355
356 def _real_initialize(self):
357 self._initialize_consent()
358 self._login()
359
360 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
361 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
362 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
363
364 def _get_default_ytcfg(self, client='web'):
365 return copy.deepcopy(INNERTUBE_CLIENTS[client])
366
367 def _get_innertube_host(self, client='web'):
368 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
369
370 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
371 # try_get but with fallback to default ytcfg client values when present
372 _func = lambda y: try_get(y, getter, expected_type)
373 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
374
375 def _extract_client_name(self, ytcfg, default_client='web'):
376 return self._ytcfg_get_safe(
377 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
378 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
379
380 def _extract_client_version(self, ytcfg, default_client='web'):
381 return self._ytcfg_get_safe(
382 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
383 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
384
385 def _extract_api_key(self, ytcfg=None, default_client='web'):
386 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
387
388 def _extract_context(self, ytcfg=None, default_client='web'):
389 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
390 context = _get_context(ytcfg)
391 if context:
392 return context
393
394 context = _get_context(self._get_default_ytcfg(default_client))
395 if not ytcfg:
396 return context
397
398 # Recreate the client context (required)
399 context['client'].update({
400 'clientVersion': self._extract_client_version(ytcfg, default_client),
401 'clientName': self._extract_client_name(ytcfg, default_client),
402 })
403 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
404 if visitor_data:
405 context['client']['visitorData'] = visitor_data
406 return context
407
408 _SAPISID = None
409
410 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
411 time_now = round(time.time())
412 if self._SAPISID is None:
413 yt_cookies = self._get_cookies('https://www.youtube.com')
414 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
415 # See: https://github.com/yt-dlp/yt-dlp/issues/393
416 sapisid_cookie = dict_get(
417 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
418 if sapisid_cookie and sapisid_cookie.value:
419 self._SAPISID = sapisid_cookie.value
420 self.write_debug('Extracted SAPISID cookie')
421 # SAPISID cookie is required if not already present
422 if not yt_cookies.get('SAPISID'):
423 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
424 self._set_cookie(
425 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
426 else:
427 self._SAPISID = False
428 if not self._SAPISID:
429 return None
430 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
431 sapisidhash = hashlib.sha1(
432 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
433 return f'SAPISIDHASH {time_now}_{sapisidhash}'
434
435 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
436 note='Downloading API JSON', errnote='Unable to download API page',
437 context=None, api_key=None, api_hostname=None, default_client='web'):
438
439 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
440 data.update(query)
441 real_headers = self.generate_api_headers(default_client=default_client)
442 real_headers.update({'content-type': 'application/json'})
443 if headers:
444 real_headers.update(headers)
445 return self._download_json(
446 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
447 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
448 data=json.dumps(data).encode('utf8'), headers=real_headers,
449 query={'key': api_key or self._extract_api_key()})
450
451 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
452 data = self._search_regex(
453 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
454 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
455 if data:
456 return self._parse_json(data, item_id, fatal=fatal)
457
458 @staticmethod
459 def _extract_session_index(*data):
460 """
461 Index of current account in account list.
462 See: https://github.com/yt-dlp/yt-dlp/pull/519
463 """
464 for ytcfg in data:
465 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
466 if session_index is not None:
467 return session_index
468
469 # Deprecated?
470 def _extract_identity_token(self, ytcfg=None, webpage=None):
471 if ytcfg:
472 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
473 if token:
474 return token
475 if webpage:
476 return self._search_regex(
477 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
478 'identity token', default=None, fatal=False)
479
480 @staticmethod
481 def _extract_account_syncid(*args):
482 """
483 Extract syncId required to download private playlists of secondary channels
484 @params response and/or ytcfg
485 """
486 for data in args:
487 # ytcfg includes channel_syncid if on secondary channel
488 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
489 if delegated_sid:
490 return delegated_sid
491 sync_ids = (try_get(
492 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
493 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
494 if len(sync_ids) >= 2 and sync_ids[1]:
495 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
496 # and just "user_syncid||" for primary channel. We only want the channel_syncid
497 return sync_ids[0]
498
499 @staticmethod
500 def _extract_visitor_data(*args):
501 """
502 Extracts visitorData from an API response or ytcfg
503 Appears to be used to track session state
504 """
505 return traverse_obj(
506 args, (..., ('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
507 expected_type=compat_str, get_all=False)
508
509 @property
510 def is_authenticated(self):
511 return bool(self._generate_sapisidhash_header())
512
513 def extract_ytcfg(self, video_id, webpage):
514 if not webpage:
515 return {}
516 return self._parse_json(
517 self._search_regex(
518 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
519 default='{}'), video_id, fatal=False) or {}
520
521 def generate_api_headers(
522 self, *, ytcfg=None, account_syncid=None, session_index=None,
523 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
524
525 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
526 headers = {
527 'X-YouTube-Client-Name': compat_str(
528 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
529 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
530 'Origin': origin,
531 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
532 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
533 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
534 }
535 if session_index is None:
536 session_index = self._extract_session_index(ytcfg)
537 if account_syncid or session_index is not None:
538 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
539
540 auth = self._generate_sapisidhash_header(origin)
541 if auth is not None:
542 headers['Authorization'] = auth
543 headers['X-Origin'] = origin
544 return {h: v for h, v in headers.items() if v is not None}
545
546 @staticmethod
547 def _build_api_continuation_query(continuation, ctp=None):
548 query = {
549 'continuation': continuation
550 }
551 # TODO: Inconsistency with clickTrackingParams.
552 # Currently we have a fixed ctp contained within context (from ytcfg)
553 # and a ctp in root query for continuation.
554 if ctp:
555 query['clickTracking'] = {'clickTrackingParams': ctp}
556 return query
557
558 @classmethod
559 def _extract_next_continuation_data(cls, renderer):
560 next_continuation = try_get(
561 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
562 lambda x: x['continuation']['reloadContinuationData']), dict)
563 if not next_continuation:
564 return
565 continuation = next_continuation.get('continuation')
566 if not continuation:
567 return
568 ctp = next_continuation.get('clickTrackingParams')
569 return cls._build_api_continuation_query(continuation, ctp)
570
571 @classmethod
572 def _extract_continuation_ep_data(cls, continuation_ep: dict):
573 if isinstance(continuation_ep, dict):
574 continuation = try_get(
575 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
576 if not continuation:
577 return
578 ctp = continuation_ep.get('clickTrackingParams')
579 return cls._build_api_continuation_query(continuation, ctp)
580
581 @classmethod
582 def _extract_continuation(cls, renderer):
583 next_continuation = cls._extract_next_continuation_data(renderer)
584 if next_continuation:
585 return next_continuation
586
587 contents = []
588 for key in ('contents', 'items'):
589 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
590
591 for content in contents:
592 if not isinstance(content, dict):
593 continue
594 continuation_ep = try_get(
595 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
596 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
597 dict)
598 continuation = cls._extract_continuation_ep_data(continuation_ep)
599 if continuation:
600 return continuation
601
602 @classmethod
603 def _extract_alerts(cls, data):
604 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
605 if not isinstance(alert_dict, dict):
606 continue
607 for alert in alert_dict.values():
608 alert_type = alert.get('type')
609 if not alert_type:
610 continue
611 message = cls._get_text(alert, 'text')
612 if message:
613 yield alert_type, message
614
615 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
616 errors = []
617 warnings = []
618 for alert_type, alert_message in alerts:
619 if alert_type.lower() == 'error' and fatal:
620 errors.append([alert_type, alert_message])
621 else:
622 warnings.append([alert_type, alert_message])
623
624 for alert_type, alert_message in (warnings + errors[:-1]):
625 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
626 if errors:
627 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
628
629 def _extract_and_report_alerts(self, data, *args, **kwargs):
630 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
631
632 def _extract_badges(self, renderer: dict):
633 badges = set()
634 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
635 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
636 if label:
637 badges.add(label.lower())
638 return badges
639
640 @staticmethod
641 def _get_text(data, *path_list, max_runs=None):
642 for path in path_list or [None]:
643 if path is None:
644 obj = [data]
645 else:
646 obj = traverse_obj(data, path, default=[])
647 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
648 obj = [obj]
649 for item in obj:
650 text = try_get(item, lambda x: x['simpleText'], compat_str)
651 if text:
652 return text
653 runs = try_get(item, lambda x: x['runs'], list) or []
654 if not runs and isinstance(item, list):
655 runs = item
656
657 runs = runs[:min(len(runs), max_runs or len(runs))]
658 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
659 if text:
660 return text
661
662 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
663 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
664 default_client='web'):
665 response = None
666 last_error = None
667 count = -1
668 retries = self.get_param('extractor_retries', 3)
669 if check_get_keys is None:
670 check_get_keys = []
671 while count < retries:
672 count += 1
673 if last_error:
674 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
675 try:
676 response = self._call_api(
677 ep=ep, fatal=True, headers=headers,
678 video_id=item_id, query=query,
679 context=self._extract_context(ytcfg, default_client),
680 api_key=self._extract_api_key(ytcfg, default_client),
681 api_hostname=api_hostname, default_client=default_client,
682 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
683 except ExtractorError as e:
684 if isinstance(e.cause, network_exceptions):
685 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
686 e.cause.seek(0)
687 yt_error = try_get(
688 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
689 lambda x: x['error']['message'], compat_str)
690 if yt_error:
691 self._report_alerts([('ERROR', yt_error)], fatal=False)
692 # Downloading page may result in intermittent 5xx HTTP error
693 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
694 # We also want to catch all other network exceptions since errors in later pages can be troublesome
695 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
696 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
697 last_error = error_to_compat_str(e.cause or e.msg)
698 if count < retries:
699 continue
700 if fatal:
701 raise
702 else:
703 self.report_warning(error_to_compat_str(e))
704 return
705
706 else:
707 try:
708 self._extract_and_report_alerts(response, only_once=True)
709 except ExtractorError as e:
710 # YouTube servers may return errors we want to retry on in a 200 OK response
711 # See: https://github.com/yt-dlp/yt-dlp/issues/839
712 if 'unknown error' in e.msg.lower():
713 last_error = e.msg
714 continue
715 if fatal:
716 raise
717 self.report_warning(error_to_compat_str(e))
718 return
719 if not check_get_keys or dict_get(response, check_get_keys):
720 break
721 # Youtube sometimes sends incomplete data
722 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
723 last_error = 'Incomplete data received'
724 if count >= retries:
725 if fatal:
726 raise ExtractorError(last_error)
727 else:
728 self.report_warning(last_error)
729 return
730 return response
731
732 @staticmethod
733 def is_music_url(url):
734 return re.match(r'https?://music\.youtube\.com/', url) is not None
735
736 def _extract_video(self, renderer):
737 video_id = renderer.get('videoId')
738 title = self._get_text(renderer, 'title')
739 description = self._get_text(renderer, 'descriptionSnippet')
740 duration = parse_duration(self._get_text(
741 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
742 view_count_text = self._get_text(renderer, 'viewCountText') or ''
743 view_count = str_to_int(self._search_regex(
744 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
745 'view count', default=None))
746
747 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
748
749 return {
750 '_type': 'url',
751 'ie_key': YoutubeIE.ie_key(),
752 'id': video_id,
753 'url': f'https://www.youtube.com/watch?v={video_id}',
754 'title': title,
755 'description': description,
756 'duration': duration,
757 'view_count': view_count,
758 'uploader': uploader,
759 }
760
761
762class YoutubeIE(YoutubeBaseInfoExtractor):
763 IE_DESC = 'YouTube'
764 _VALID_URL = r"""(?x)^
765 (
766 (?:https?://|//) # http(s):// or protocol-independent URL
767 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
768 (?:www\.)?deturl\.com/www\.youtube\.com|
769 (?:www\.)?pwnyoutube\.com|
770 (?:www\.)?hooktube\.com|
771 (?:www\.)?yourepeat\.com|
772 tube\.majestyc\.net|
773 %(invidious)s|
774 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
775 (?:.*?\#/)? # handle anchor (#/) redirect urls
776 (?: # the various things that can precede the ID:
777 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
778 |(?: # or the v= param in all its forms
779 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
780 (?:\?|\#!?) # the params delimiter ? or # or #!
781 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
782 v=
783 )
784 ))
785 |(?:
786 youtu\.be| # just youtu.be/xxxx
787 vid\.plus| # or vid.plus/xxxx
788 zwearz\.com/watch| # or zwearz.com/watch/xxxx
789 %(invidious)s
790 )/
791 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
792 )
793 )? # all until now is optional -> you can pass the naked ID
794 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
795 (?(1).+)? # if we found the ID, everything can follow
796 (?:\#|$)""" % {
797 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
798 }
799 _PLAYER_INFO_RE = (
800 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
801 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
802 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
803 )
804 _formats = {
805 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
806 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
807 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
808 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
809 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
810 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
811 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
812 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
813 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
814 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
815 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
816 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
817 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
818 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
819 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
820 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
821 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
822 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
823
824
825 # 3D videos
826 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
827 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
828 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
829 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
830 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
831 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
832 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
833
834 # Apple HTTP Live Streaming
835 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
836 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
837 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
838 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
839 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
840 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
841 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
842 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
843
844 # DASH mp4 video
845 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
846 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
847 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
848 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
849 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
850 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
851 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
852 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
853 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
854 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
855 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
856 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
857
858 # Dash mp4 audio
859 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
860 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
861 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
862 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
863 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
864 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
865 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
866
867 # Dash webm
868 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
869 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
870 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
871 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
872 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
873 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
874 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
875 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
876 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
877 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
878 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
879 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
880 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
881 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
882 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
883 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
884 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
885 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
886 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
887 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
888 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
889 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
890
891 # Dash webm audio
892 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
893 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
894
895 # Dash webm audio with opus inside
896 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
897 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
898 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
899
900 # RTMP (unnamed)
901 '_rtmp': {'protocol': 'rtmp'},
902
903 # av01 video only formats sometimes served with "unknown" codecs
904 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
905 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
906 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
907 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
908 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
909 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
910 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
911 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
912 }
913 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
914
915 _GEO_BYPASS = False
916
917 IE_NAME = 'youtube'
918 _TESTS = [
919 {
920 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
921 'info_dict': {
922 'id': 'BaW_jenozKc',
923 'ext': 'mp4',
924 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
925 'uploader': 'Philipp Hagemeister',
926 'uploader_id': 'phihag',
927 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
928 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
929 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
930 'upload_date': '20121002',
931 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
932 'categories': ['Science & Technology'],
933 'tags': ['youtube-dl'],
934 'duration': 10,
935 'view_count': int,
936 'like_count': int,
937 'dislike_count': int,
938 'start_time': 1,
939 'end_time': 9,
940 }
941 },
942 {
943 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
944 'note': 'Embed-only video (#1746)',
945 'info_dict': {
946 'id': 'yZIXLfi8CZQ',
947 'ext': 'mp4',
948 'upload_date': '20120608',
949 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
950 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
951 'uploader': 'SET India',
952 'uploader_id': 'setindia',
953 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
954 'age_limit': 18,
955 },
956 'skip': 'Private video',
957 },
958 {
959 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
960 'note': 'Use the first video ID in the URL',
961 'info_dict': {
962 'id': 'BaW_jenozKc',
963 'ext': 'mp4',
964 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
965 'uploader': 'Philipp Hagemeister',
966 'uploader_id': 'phihag',
967 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
968 'upload_date': '20121002',
969 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
970 'categories': ['Science & Technology'],
971 'tags': ['youtube-dl'],
972 'duration': 10,
973 'view_count': int,
974 'like_count': int,
975 'dislike_count': int,
976 },
977 'params': {
978 'skip_download': True,
979 },
980 },
981 {
982 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
983 'note': '256k DASH audio (format 141) via DASH manifest',
984 'info_dict': {
985 'id': 'a9LDPn-MO4I',
986 'ext': 'm4a',
987 'upload_date': '20121002',
988 'uploader_id': '8KVIDEO',
989 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
990 'description': '',
991 'uploader': '8KVIDEO',
992 'title': 'UHDTV TEST 8K VIDEO.mp4'
993 },
994 'params': {
995 'youtube_include_dash_manifest': True,
996 'format': '141',
997 },
998 'skip': 'format 141 not served anymore',
999 },
1000 # DASH manifest with encrypted signature
1001 {
1002 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1003 'info_dict': {
1004 'id': 'IB3lcPjvWLA',
1005 'ext': 'm4a',
1006 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1007 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1008 'duration': 244,
1009 'uploader': 'AfrojackVEVO',
1010 'uploader_id': 'AfrojackVEVO',
1011 'upload_date': '20131011',
1012 'abr': 129.495,
1013 },
1014 'params': {
1015 'youtube_include_dash_manifest': True,
1016 'format': '141/bestaudio[ext=m4a]',
1017 },
1018 },
1019 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1020 {
1021 'note': 'Embed allowed age-gate video',
1022 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1023 'info_dict': {
1024 'id': 'HtVdAasjOgU',
1025 'ext': 'mp4',
1026 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1027 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1028 'duration': 142,
1029 'uploader': 'The Witcher',
1030 'uploader_id': 'WitcherGame',
1031 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1032 'upload_date': '20140605',
1033 'age_limit': 18,
1034 },
1035 },
1036 {
1037 'note': 'Age-gate video with embed allowed in public site',
1038 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1039 'info_dict': {
1040 'id': 'HsUATh_Nc2U',
1041 'ext': 'mp4',
1042 'title': 'Godzilla 2 (Official Video)',
1043 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1044 'upload_date': '20200408',
1045 'uploader_id': 'FlyingKitty900',
1046 'uploader': 'FlyingKitty',
1047 'age_limit': 18,
1048 },
1049 },
1050 {
1051 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1052 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1053 'info_dict': {
1054 'id': 'Tq92D6wQ1mg',
1055 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1056 'ext': 'mp4',
1057 'upload_date': '20191227',
1058 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1059 'uploader': 'Projekt Melody',
1060 'description': 'md5:17eccca93a786d51bc67646756894066',
1061 'age_limit': 18,
1062 },
1063 },
1064 {
1065 'note': 'Non-Agegated non-embeddable video',
1066 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1067 'info_dict': {
1068 'id': 'MeJVWBSsPAY',
1069 'ext': 'mp4',
1070 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1071 'uploader': 'Herr Lurik',
1072 'uploader_id': 'st3in234',
1073 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1074 'upload_date': '20130730',
1075 },
1076 },
1077 {
1078 'note': 'Non-bypassable age-gated video',
1079 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1080 'only_matching': True,
1081 },
1082 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1083 # YouTube Red ad is not captured for creator
1084 {
1085 'url': '__2ABJjxzNo',
1086 'info_dict': {
1087 'id': '__2ABJjxzNo',
1088 'ext': 'mp4',
1089 'duration': 266,
1090 'upload_date': '20100430',
1091 'uploader_id': 'deadmau5',
1092 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1093 'creator': 'deadmau5',
1094 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1095 'uploader': 'deadmau5',
1096 'title': 'Deadmau5 - Some Chords (HD)',
1097 'alt_title': 'Some Chords',
1098 },
1099 'expected_warnings': [
1100 'DASH manifest missing',
1101 ]
1102 },
1103 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1104 {
1105 'url': 'lqQg6PlCWgI',
1106 'info_dict': {
1107 'id': 'lqQg6PlCWgI',
1108 'ext': 'mp4',
1109 'duration': 6085,
1110 'upload_date': '20150827',
1111 'uploader_id': 'olympic',
1112 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1113 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1114 'uploader': 'Olympics',
1115 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1116 },
1117 'params': {
1118 'skip_download': 'requires avconv',
1119 }
1120 },
1121 # Non-square pixels
1122 {
1123 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1124 'info_dict': {
1125 'id': '_b-2C3KPAM0',
1126 'ext': 'mp4',
1127 'stretched_ratio': 16 / 9.,
1128 'duration': 85,
1129 'upload_date': '20110310',
1130 'uploader_id': 'AllenMeow',
1131 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1132 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1133 'uploader': '孫ᄋᄅ',
1134 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1135 },
1136 },
1137 # url_encoded_fmt_stream_map is empty string
1138 {
1139 'url': 'qEJwOuvDf7I',
1140 'info_dict': {
1141 'id': 'qEJwOuvDf7I',
1142 'ext': 'webm',
1143 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1144 'description': '',
1145 'upload_date': '20150404',
1146 'uploader_id': 'spbelect',
1147 'uploader': 'Наблюдатели Петербурга',
1148 },
1149 'params': {
1150 'skip_download': 'requires avconv',
1151 },
1152 'skip': 'This live event has ended.',
1153 },
1154 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1155 {
1156 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1157 'info_dict': {
1158 'id': 'FIl7x6_3R5Y',
1159 'ext': 'webm',
1160 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1161 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1162 'duration': 220,
1163 'upload_date': '20150625',
1164 'uploader_id': 'dorappi2000',
1165 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1166 'uploader': 'dorappi2000',
1167 'formats': 'mincount:31',
1168 },
1169 'skip': 'not actual anymore',
1170 },
1171 # DASH manifest with segment_list
1172 {
1173 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1174 'md5': '8ce563a1d667b599d21064e982ab9e31',
1175 'info_dict': {
1176 'id': 'CsmdDsKjzN8',
1177 'ext': 'mp4',
1178 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1179 'uploader': 'Airtek',
1180 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1181 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1182 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1183 },
1184 'params': {
1185 'youtube_include_dash_manifest': True,
1186 'format': '135', # bestvideo
1187 },
1188 'skip': 'This live event has ended.',
1189 },
1190 {
1191 # Multifeed videos (multiple cameras), URL is for Main Camera
1192 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1193 'info_dict': {
1194 'id': 'jvGDaLqkpTg',
1195 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1196 'description': 'md5:e03b909557865076822aa169218d6a5d',
1197 },
1198 'playlist': [{
1199 'info_dict': {
1200 'id': 'jvGDaLqkpTg',
1201 'ext': 'mp4',
1202 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1203 'description': 'md5:e03b909557865076822aa169218d6a5d',
1204 'duration': 10643,
1205 'upload_date': '20161111',
1206 'uploader': 'Team PGP',
1207 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1208 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1209 },
1210 }, {
1211 'info_dict': {
1212 'id': '3AKt1R1aDnw',
1213 'ext': 'mp4',
1214 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1215 'description': 'md5:e03b909557865076822aa169218d6a5d',
1216 'duration': 10991,
1217 'upload_date': '20161111',
1218 'uploader': 'Team PGP',
1219 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1220 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1221 },
1222 }, {
1223 'info_dict': {
1224 'id': 'RtAMM00gpVc',
1225 'ext': 'mp4',
1226 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1227 'description': 'md5:e03b909557865076822aa169218d6a5d',
1228 'duration': 10995,
1229 'upload_date': '20161111',
1230 'uploader': 'Team PGP',
1231 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1232 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1233 },
1234 }, {
1235 'info_dict': {
1236 'id': '6N2fdlP3C5U',
1237 'ext': 'mp4',
1238 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1239 'description': 'md5:e03b909557865076822aa169218d6a5d',
1240 'duration': 10990,
1241 'upload_date': '20161111',
1242 'uploader': 'Team PGP',
1243 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1244 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1245 },
1246 }],
1247 'params': {
1248 'skip_download': True,
1249 },
1250 'skip': 'Not multifeed anymore',
1251 },
1252 {
1253 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1254 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1255 'info_dict': {
1256 'id': 'gVfLd0zydlo',
1257 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1258 },
1259 'playlist_count': 2,
1260 'skip': 'Not multifeed anymore',
1261 },
1262 {
1263 'url': 'https://vid.plus/FlRa-iH7PGw',
1264 'only_matching': True,
1265 },
1266 {
1267 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1268 'only_matching': True,
1269 },
1270 {
1271 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1272 # Also tests cut-off URL expansion in video description (see
1273 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1274 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1275 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1276 'info_dict': {
1277 'id': 'lsguqyKfVQg',
1278 'ext': 'mp4',
1279 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1280 'alt_title': 'Dark Walk',
1281 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1282 'duration': 133,
1283 'upload_date': '20151119',
1284 'uploader_id': 'IronSoulElf',
1285 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1286 'uploader': 'IronSoulElf',
1287 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1288 'track': 'Dark Walk',
1289 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1290 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1291 },
1292 'params': {
1293 'skip_download': True,
1294 },
1295 },
1296 {
1297 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1298 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1299 'only_matching': True,
1300 },
1301 {
1302 # Video with yt:stretch=17:0
1303 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1304 'info_dict': {
1305 'id': 'Q39EVAstoRM',
1306 'ext': 'mp4',
1307 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1308 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1309 'upload_date': '20151107',
1310 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1311 'uploader': 'CH GAMER DROID',
1312 },
1313 'params': {
1314 'skip_download': True,
1315 },
1316 'skip': 'This video does not exist.',
1317 },
1318 {
1319 # Video with incomplete 'yt:stretch=16:'
1320 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1321 'only_matching': True,
1322 },
1323 {
1324 # Video licensed under Creative Commons
1325 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1326 'info_dict': {
1327 'id': 'M4gD1WSo5mA',
1328 'ext': 'mp4',
1329 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1330 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1331 'duration': 721,
1332 'upload_date': '20150127',
1333 'uploader_id': 'BerkmanCenter',
1334 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1335 'uploader': 'The Berkman Klein Center for Internet & Society',
1336 'license': 'Creative Commons Attribution license (reuse allowed)',
1337 },
1338 'params': {
1339 'skip_download': True,
1340 },
1341 },
1342 {
1343 # Channel-like uploader_url
1344 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1345 'info_dict': {
1346 'id': 'eQcmzGIKrzg',
1347 'ext': 'mp4',
1348 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1349 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1350 'duration': 4060,
1351 'upload_date': '20151119',
1352 'uploader': 'Bernie Sanders',
1353 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1354 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1355 'license': 'Creative Commons Attribution license (reuse allowed)',
1356 },
1357 'params': {
1358 'skip_download': True,
1359 },
1360 },
1361 {
1362 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1363 'only_matching': True,
1364 },
1365 {
1366 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1367 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1368 'only_matching': True,
1369 },
1370 {
1371 # Rental video preview
1372 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1373 'info_dict': {
1374 'id': 'uGpuVWrhIzE',
1375 'ext': 'mp4',
1376 'title': 'Piku - Trailer',
1377 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1378 'upload_date': '20150811',
1379 'uploader': 'FlixMatrix',
1380 'uploader_id': 'FlixMatrixKaravan',
1381 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1382 'license': 'Standard YouTube License',
1383 },
1384 'params': {
1385 'skip_download': True,
1386 },
1387 'skip': 'This video is not available.',
1388 },
1389 {
1390 # YouTube Red video with episode data
1391 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1392 'info_dict': {
1393 'id': 'iqKdEhx-dD4',
1394 'ext': 'mp4',
1395 'title': 'Isolation - Mind Field (Ep 1)',
1396 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1397 'duration': 2085,
1398 'upload_date': '20170118',
1399 'uploader': 'Vsauce',
1400 'uploader_id': 'Vsauce',
1401 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1402 'series': 'Mind Field',
1403 'season_number': 1,
1404 'episode_number': 1,
1405 },
1406 'params': {
1407 'skip_download': True,
1408 },
1409 'expected_warnings': [
1410 'Skipping DASH manifest',
1411 ],
1412 },
1413 {
1414 # The following content has been identified by the YouTube community
1415 # as inappropriate or offensive to some audiences.
1416 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1417 'info_dict': {
1418 'id': '6SJNVb0GnPI',
1419 'ext': 'mp4',
1420 'title': 'Race Differences in Intelligence',
1421 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1422 'duration': 965,
1423 'upload_date': '20140124',
1424 'uploader': 'New Century Foundation',
1425 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1426 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1427 },
1428 'params': {
1429 'skip_download': True,
1430 },
1431 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1432 },
1433 {
1434 # itag 212
1435 'url': '1t24XAntNCY',
1436 'only_matching': True,
1437 },
1438 {
1439 # geo restricted to JP
1440 'url': 'sJL6WA-aGkQ',
1441 'only_matching': True,
1442 },
1443 {
1444 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1445 'only_matching': True,
1446 },
1447 {
1448 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1449 'only_matching': True,
1450 },
1451 {
1452 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1453 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1454 'only_matching': True,
1455 },
1456 {
1457 # DRM protected
1458 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1459 'only_matching': True,
1460 },
1461 {
1462 # Video with unsupported adaptive stream type formats
1463 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1464 'info_dict': {
1465 'id': 'Z4Vy8R84T1U',
1466 'ext': 'mp4',
1467 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1468 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1469 'duration': 433,
1470 'upload_date': '20130923',
1471 'uploader': 'Amelia Putri Harwita',
1472 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1473 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1474 'formats': 'maxcount:10',
1475 },
1476 'params': {
1477 'skip_download': True,
1478 'youtube_include_dash_manifest': False,
1479 },
1480 'skip': 'not actual anymore',
1481 },
1482 {
1483 # Youtube Music Auto-generated description
1484 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1485 'info_dict': {
1486 'id': 'MgNrAu2pzNs',
1487 'ext': 'mp4',
1488 'title': 'Voyeur Girl',
1489 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1490 'upload_date': '20190312',
1491 'uploader': 'Stephen - Topic',
1492 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1493 'artist': 'Stephen',
1494 'track': 'Voyeur Girl',
1495 'album': 'it\'s too much love to know my dear',
1496 'release_date': '20190313',
1497 'release_year': 2019,
1498 },
1499 'params': {
1500 'skip_download': True,
1501 },
1502 },
1503 {
1504 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1505 'only_matching': True,
1506 },
1507 {
1508 # invalid -> valid video id redirection
1509 'url': 'DJztXj2GPfl',
1510 'info_dict': {
1511 'id': 'DJztXj2GPfk',
1512 'ext': 'mp4',
1513 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1514 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1515 'upload_date': '20090125',
1516 'uploader': 'Prochorowka',
1517 'uploader_id': 'Prochorowka',
1518 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1519 'artist': 'Panjabi MC',
1520 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1521 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1522 },
1523 'params': {
1524 'skip_download': True,
1525 },
1526 'skip': 'Video unavailable',
1527 },
1528 {
1529 # empty description results in an empty string
1530 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1531 'info_dict': {
1532 'id': 'x41yOUIvK2k',
1533 'ext': 'mp4',
1534 'title': 'IMG 3456',
1535 'description': '',
1536 'upload_date': '20170613',
1537 'uploader_id': 'ElevageOrVert',
1538 'uploader': 'ElevageOrVert',
1539 },
1540 'params': {
1541 'skip_download': True,
1542 },
1543 },
1544 {
1545 # with '};' inside yt initial data (see [1])
1546 # see [2] for an example with '};' inside ytInitialPlayerResponse
1547 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1548 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1549 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1550 'info_dict': {
1551 'id': 'CHqg6qOn4no',
1552 'ext': 'mp4',
1553 'title': 'Part 77 Sort a list of simple types in c#',
1554 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1555 'upload_date': '20130831',
1556 'uploader_id': 'kudvenkat',
1557 'uploader': 'kudvenkat',
1558 },
1559 'params': {
1560 'skip_download': True,
1561 },
1562 },
1563 {
1564 # another example of '};' in ytInitialData
1565 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1566 'only_matching': True,
1567 },
1568 {
1569 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1570 'only_matching': True,
1571 },
1572 {
1573 # https://github.com/ytdl-org/youtube-dl/pull/28094
1574 'url': 'OtqTfy26tG0',
1575 'info_dict': {
1576 'id': 'OtqTfy26tG0',
1577 'ext': 'mp4',
1578 'title': 'Burn Out',
1579 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1580 'upload_date': '20141120',
1581 'uploader': 'The Cinematic Orchestra - Topic',
1582 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1583 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1584 'artist': 'The Cinematic Orchestra',
1585 'track': 'Burn Out',
1586 'album': 'Every Day',
1587 'release_data': None,
1588 'release_year': None,
1589 },
1590 'params': {
1591 'skip_download': True,
1592 },
1593 },
1594 {
1595 # controversial video, only works with bpctr when authenticated with cookies
1596 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1597 'only_matching': True,
1598 },
1599 {
1600 # controversial video, requires bpctr/contentCheckOk
1601 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1602 'info_dict': {
1603 'id': 'SZJvDhaSDnc',
1604 'ext': 'mp4',
1605 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1606 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1607 'uploader': 'CBS This Morning',
1608 'uploader_id': 'CBSThisMorning',
1609 'upload_date': '20140716',
1610 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1611 }
1612 },
1613 {
1614 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1615 'url': 'cBvYw8_A0vQ',
1616 'info_dict': {
1617 'id': 'cBvYw8_A0vQ',
1618 'ext': 'mp4',
1619 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1620 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1621 'upload_date': '20201120',
1622 'uploader': 'Walk around Japan',
1623 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1624 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1625 },
1626 'params': {
1627 'skip_download': True,
1628 },
1629 }, {
1630 # Has multiple audio streams
1631 'url': 'WaOKSUlf4TM',
1632 'only_matching': True
1633 }, {
1634 # Requires Premium: has format 141 when requested using YTM url
1635 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1636 'only_matching': True
1637 }, {
1638 # multiple subtitles with same lang_code
1639 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1640 'only_matching': True,
1641 }, {
1642 # Force use android client fallback
1643 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1644 'info_dict': {
1645 'id': 'YOelRv7fMxY',
1646 'title': 'DIGGING A SECRET TUNNEL Part 1',
1647 'ext': '3gp',
1648 'upload_date': '20210624',
1649 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1650 'uploader': 'colinfurze',
1651 'uploader_id': 'colinfurze',
1652 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1653 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1654 },
1655 'params': {
1656 'format': '17', # 3gp format available on android
1657 'extractor_args': {'youtube': {'player_client': ['android']}},
1658 },
1659 },
1660 {
1661 # Skip download of additional client configs (remix client config in this case)
1662 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1663 'only_matching': True,
1664 'params': {
1665 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1666 },
1667 }, {
1668 # shorts
1669 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1670 'only_matching': True,
1671 },
1672 ]
1673
1674 @classmethod
1675 def suitable(cls, url):
1676 from ..utils import parse_qs
1677
1678 qs = parse_qs(url)
1679 if qs.get('list', [None])[0]:
1680 return False
1681 return super(YoutubeIE, cls).suitable(url)
1682
1683 def __init__(self, *args, **kwargs):
1684 super(YoutubeIE, self).__init__(*args, **kwargs)
1685 self._code_cache = {}
1686 self._player_cache = {}
1687
1688 def _extract_player_url(self, *ytcfgs, webpage=None):
1689 player_url = traverse_obj(
1690 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1691 get_all=False, expected_type=compat_str)
1692 if not player_url:
1693 return
1694 if player_url.startswith('//'):
1695 player_url = 'https:' + player_url
1696 elif not re.match(r'https?://', player_url):
1697 player_url = compat_urlparse.urljoin(
1698 'https://www.youtube.com', player_url)
1699 return player_url
1700
1701 def _download_player_url(self, video_id, fatal=False):
1702 res = self._download_webpage(
1703 'https://www.youtube.com/iframe_api',
1704 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1705 if res:
1706 player_version = self._search_regex(
1707 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1708 if player_version:
1709 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1710
1711 def _signature_cache_id(self, example_sig):
1712 """ Return a string representation of a signature """
1713 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1714
1715 @classmethod
1716 def _extract_player_info(cls, player_url):
1717 for player_re in cls._PLAYER_INFO_RE:
1718 id_m = re.search(player_re, player_url)
1719 if id_m:
1720 break
1721 else:
1722 raise ExtractorError('Cannot identify player %r' % player_url)
1723 return id_m.group('id')
1724
1725 def _load_player(self, video_id, player_url, fatal=True):
1726 player_id = self._extract_player_info(player_url)
1727 if player_id not in self._code_cache:
1728 code = self._download_webpage(
1729 player_url, video_id, fatal=fatal,
1730 note='Downloading player ' + player_id,
1731 errnote='Download of %s failed' % player_url)
1732 if code:
1733 self._code_cache[player_id] = code
1734 return self._code_cache.get(player_id)
1735
1736 def _extract_signature_function(self, video_id, player_url, example_sig):
1737 player_id = self._extract_player_info(player_url)
1738
1739 # Read from filesystem cache
1740 func_id = 'js_%s_%s' % (
1741 player_id, self._signature_cache_id(example_sig))
1742 assert os.path.basename(func_id) == func_id
1743
1744 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1745 if cache_spec is not None:
1746 return lambda s: ''.join(s[i] for i in cache_spec)
1747
1748 code = self._load_player(video_id, player_url)
1749 if code:
1750 res = self._parse_sig_js(code)
1751
1752 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1753 cache_res = res(test_string)
1754 cache_spec = [ord(c) for c in cache_res]
1755
1756 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1757 return res
1758
1759 def _print_sig_code(self, func, example_sig):
1760 if not self.get_param('youtube_print_sig_code'):
1761 return
1762
1763 def gen_sig_code(idxs):
1764 def _genslice(start, end, step):
1765 starts = '' if start == 0 else str(start)
1766 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1767 steps = '' if step == 1 else (':%d' % step)
1768 return 's[%s%s%s]' % (starts, ends, steps)
1769
1770 step = None
1771 # Quelch pyflakes warnings - start will be set when step is set
1772 start = '(Never used)'
1773 for i, prev in zip(idxs[1:], idxs[:-1]):
1774 if step is not None:
1775 if i - prev == step:
1776 continue
1777 yield _genslice(start, prev, step)
1778 step = None
1779 continue
1780 if i - prev in [-1, 1]:
1781 step = i - prev
1782 start = prev
1783 continue
1784 else:
1785 yield 's[%d]' % prev
1786 if step is None:
1787 yield 's[%d]' % i
1788 else:
1789 yield _genslice(start, i, step)
1790
1791 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1792 cache_res = func(test_string)
1793 cache_spec = [ord(c) for c in cache_res]
1794 expr_code = ' + '.join(gen_sig_code(cache_spec))
1795 signature_id_tuple = '(%s)' % (
1796 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1797 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1798 ' return %s\n') % (signature_id_tuple, expr_code)
1799 self.to_screen('Extracted signature function:\n' + code)
1800
1801 def _parse_sig_js(self, jscode):
1802 funcname = self._search_regex(
1803 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1804 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1805 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1806 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1807 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1808 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1809 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1810 # Obsolete patterns
1811 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1812 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1813 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1814 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1815 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1816 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1817 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1818 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1819 jscode, 'Initial JS player signature function name', group='sig')
1820
1821 jsi = JSInterpreter(jscode)
1822 initial_function = jsi.extract_function(funcname)
1823 return lambda s: initial_function([s])
1824
1825 def _decrypt_signature(self, s, video_id, player_url):
1826 """Turn the encrypted s field into a working signature"""
1827
1828 if player_url is None:
1829 raise ExtractorError('Cannot decrypt signature without player_url')
1830
1831 try:
1832 player_id = (player_url, self._signature_cache_id(s))
1833 if player_id not in self._player_cache:
1834 func = self._extract_signature_function(
1835 video_id, player_url, s
1836 )
1837 self._player_cache[player_id] = func
1838 func = self._player_cache[player_id]
1839 self._print_sig_code(func, s)
1840 return func(s)
1841 except Exception as e:
1842 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
1843
1844 def _decrypt_nsig(self, s, video_id, player_url):
1845 """Turn the encrypted n field into a working signature"""
1846 if player_url is None:
1847 raise ExtractorError('Cannot decrypt nsig without player_url')
1848 if player_url.startswith('//'):
1849 player_url = 'https:' + player_url
1850 elif not re.match(r'https?://', player_url):
1851 player_url = compat_urlparse.urljoin(
1852 'https://www.youtube.com', player_url)
1853
1854 sig_id = ('nsig_value', s)
1855 if sig_id in self._player_cache:
1856 return self._player_cache[sig_id]
1857
1858 try:
1859 player_id = ('nsig', player_url)
1860 if player_id not in self._player_cache:
1861 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
1862 func = self._player_cache[player_id]
1863 self._player_cache[sig_id] = func(s)
1864 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
1865 return self._player_cache[sig_id]
1866 except Exception as e:
1867 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
1868
1869 def _extract_n_function_name(self, jscode):
1870 return self._search_regex(
1871 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
1872 jscode, 'Initial JS player n function name', group='nfunc')
1873
1874 def _extract_n_function(self, video_id, player_url):
1875 player_id = self._extract_player_info(player_url)
1876 func_code = self._downloader.cache.load('youtube-nsig', player_id)
1877
1878 if func_code:
1879 jsi = JSInterpreter(func_code)
1880 else:
1881 jscode = self._load_player(video_id, player_url)
1882 funcname = self._extract_n_function_name(jscode)
1883 jsi = JSInterpreter(jscode)
1884 func_code = jsi.extract_function_code(funcname)
1885 self._downloader.cache.store('youtube-nsig', player_id, func_code)
1886
1887 if self.get_param('youtube_print_sig_code'):
1888 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
1889
1890 return lambda s: jsi.extract_function_from_code(*func_code)([s])
1891
1892 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1893 """
1894 Extract signatureTimestamp (sts)
1895 Required to tell API what sig/player version is in use.
1896 """
1897 sts = None
1898 if isinstance(ytcfg, dict):
1899 sts = int_or_none(ytcfg.get('STS'))
1900
1901 if not sts:
1902 # Attempt to extract from player
1903 if player_url is None:
1904 error_msg = 'Cannot extract signature timestamp without player_url.'
1905 if fatal:
1906 raise ExtractorError(error_msg)
1907 self.report_warning(error_msg)
1908 return
1909 code = self._load_player(video_id, player_url, fatal=fatal)
1910 if code:
1911 sts = int_or_none(self._search_regex(
1912 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1913 'JS player signature timestamp', group='sts', fatal=fatal))
1914 return sts
1915
1916 def _mark_watched(self, video_id, player_responses):
1917 playback_url = traverse_obj(
1918 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1919 expected_type=url_or_none, get_all=False)
1920 if not playback_url:
1921 self.report_warning('Unable to mark watched')
1922 return
1923 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1924 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1925
1926 # cpn generation algorithm is reverse engineered from base.js.
1927 # In fact it works even with dummy cpn.
1928 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1929 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1930
1931 qs.update({
1932 'ver': ['2'],
1933 'cpn': [cpn],
1934 })
1935 playback_url = compat_urlparse.urlunparse(
1936 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1937
1938 self._download_webpage(
1939 playback_url, video_id, 'Marking watched',
1940 'Unable to mark watched', fatal=False)
1941
1942 @staticmethod
1943 def _extract_urls(webpage):
1944 # Embedded YouTube player
1945 entries = [
1946 unescapeHTML(mobj.group('url'))
1947 for mobj in re.finditer(r'''(?x)
1948 (?:
1949 <iframe[^>]+?src=|
1950 data-video-url=|
1951 <embed[^>]+?src=|
1952 embedSWF\(?:\s*|
1953 <object[^>]+data=|
1954 new\s+SWFObject\(
1955 )
1956 (["\'])
1957 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1958 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1959 \1''', webpage)]
1960
1961 # lazyYT YouTube embed
1962 entries.extend(list(map(
1963 unescapeHTML,
1964 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1965
1966 # Wordpress "YouTube Video Importer" plugin
1967 matches = re.findall(r'''(?x)<div[^>]+
1968 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1969 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1970 entries.extend(m[-1] for m in matches)
1971
1972 return entries
1973
1974 @staticmethod
1975 def _extract_url(webpage):
1976 urls = YoutubeIE._extract_urls(webpage)
1977 return urls[0] if urls else None
1978
1979 @classmethod
1980 def extract_id(cls, url):
1981 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1982 if mobj is None:
1983 raise ExtractorError('Invalid URL: %s' % url)
1984 return mobj.group('id')
1985
1986 def _extract_chapters_from_json(self, data, duration):
1987 chapter_list = traverse_obj(
1988 data, (
1989 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
1990 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
1991 ), expected_type=list)
1992
1993 return self._extract_chapters(
1994 chapter_list,
1995 chapter_time=lambda chapter: float_or_none(
1996 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
1997 chapter_title=lambda chapter: traverse_obj(
1998 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
1999 duration=duration)
2000
2001 def _extract_chapters_from_engagement_panel(self, data, duration):
2002 content_list = traverse_obj(
2003 data,
2004 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2005 expected_type=list, default=[])
2006 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2007 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2008
2009 return next((
2010 filter(None, (
2011 self._extract_chapters(
2012 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2013 chapter_time, chapter_title, duration)
2014 for contents in content_list
2015 ))), [])
2016
2017 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2018 chapters = []
2019 last_chapter = {'start_time': 0}
2020 for idx, chapter in enumerate(chapter_list or []):
2021 title = chapter_title(chapter)
2022 start_time = chapter_time(chapter)
2023 if start_time is None:
2024 continue
2025 last_chapter['end_time'] = start_time
2026 if start_time < last_chapter['start_time']:
2027 if idx == 1:
2028 chapters.pop()
2029 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2030 else:
2031 self.report_warning(f'Invalid start time for chapter "{title}"')
2032 continue
2033 last_chapter = {'start_time': start_time, 'title': title}
2034 chapters.append(last_chapter)
2035 last_chapter['end_time'] = duration
2036 return chapters
2037
2038 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2039 return self._parse_json(self._search_regex(
2040 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2041 regex), webpage, name, default='{}'), video_id, fatal=False)
2042
2043 @staticmethod
2044 def parse_time_text(time_text):
2045 """
2046 Parse the comment time text
2047 time_text is in the format 'X units ago (edited)'
2048 """
2049 time_text_split = time_text.split(' ')
2050 if len(time_text_split) >= 3:
2051 try:
2052 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2053 except ValueError:
2054 return None
2055
2056 def _extract_comment(self, comment_renderer, parent=None):
2057 comment_id = comment_renderer.get('commentId')
2058 if not comment_id:
2059 return
2060
2061 text = self._get_text(comment_renderer, 'contentText')
2062
2063 # note: timestamp is an estimate calculated from the current time and time_text
2064 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2065 time_text_dt = self.parse_time_text(time_text)
2066 if isinstance(time_text_dt, datetime.datetime):
2067 timestamp = calendar.timegm(time_text_dt.timetuple())
2068 author = self._get_text(comment_renderer, 'authorText')
2069 author_id = try_get(comment_renderer,
2070 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2071
2072 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2073 lambda x: x['likeCount']), compat_str)) or 0
2074 author_thumbnail = try_get(comment_renderer,
2075 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2076
2077 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2078 is_favorited = 'creatorHeart' in (try_get(
2079 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2080 return {
2081 'id': comment_id,
2082 'text': text,
2083 'timestamp': timestamp,
2084 'time_text': time_text,
2085 'like_count': votes,
2086 'is_favorited': is_favorited,
2087 'author': author,
2088 'author_id': author_id,
2089 'author_thumbnail': author_thumbnail,
2090 'author_is_uploader': author_is_uploader,
2091 'parent': parent or 'root'
2092 }
2093
2094 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None):
2095
2096 def extract_header(contents):
2097 _continuation = None
2098 for content in contents:
2099 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2100 expected_comment_count = parse_count(self._get_text(
2101 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2102
2103 if expected_comment_count:
2104 comment_counts[1] = expected_comment_count
2105 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2106 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2107 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2108
2109 sort_menu_item = try_get(
2110 comments_header_renderer,
2111 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2112 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2113
2114 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2115 if not _continuation:
2116 continue
2117
2118 sort_text = sort_menu_item.get('title')
2119 if isinstance(sort_text, compat_str):
2120 sort_text = sort_text.lower()
2121 else:
2122 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2123 self.to_screen('Sorting comments by %s' % sort_text)
2124 break
2125 return _continuation
2126
2127 def extract_thread(contents):
2128 if not parent:
2129 comment_counts[2] = 0
2130 for content in contents:
2131 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2132 comment_renderer = try_get(
2133 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2134 content, (lambda x: x['commentRenderer'], dict))
2135
2136 if not comment_renderer:
2137 continue
2138 comment = self._extract_comment(comment_renderer, parent)
2139 if not comment:
2140 continue
2141 comment_counts[0] += 1
2142 yield comment
2143 # Attempt to get the replies
2144 comment_replies_renderer = try_get(
2145 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2146
2147 if comment_replies_renderer:
2148 comment_counts[2] += 1
2149 comment_entries_iter = self._comment_entries(
2150 comment_replies_renderer, ytcfg, video_id,
2151 parent=comment.get('id'), comment_counts=comment_counts)
2152
2153 for reply_comment in comment_entries_iter:
2154 yield reply_comment
2155
2156 # YouTube comments have a max depth of 2
2157 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2158 if max_depth == 1 and parent:
2159 return
2160 if not comment_counts:
2161 # comment so far, est. total comments, current comment thread #
2162 comment_counts = [0, 0, 0]
2163
2164 continuation = self._extract_continuation(root_continuation_data)
2165 if continuation and len(continuation['continuation']) < 27:
2166 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2167 continuation_token = self._generate_comment_continuation(video_id)
2168 continuation = self._build_api_continuation_query(continuation_token, None)
2169
2170 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2171 if message and not parent:
2172 self.report_warning(message, video_id=video_id)
2173
2174 visitor_data = None
2175 is_first_continuation = parent is None
2176
2177 for page_num in itertools.count(0):
2178 if not continuation:
2179 break
2180 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
2181 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2182 if page_num == 0:
2183 if is_first_continuation:
2184 note_prefix = 'Downloading comment section API JSON'
2185 else:
2186 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2187 comment_counts[2], comment_prog_str)
2188 else:
2189 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2190 ' ' if parent else '', ' replies' if parent else '',
2191 page_num, comment_prog_str)
2192
2193 response = self._extract_response(
2194 item_id=None, query=continuation,
2195 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2196 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2197 if not response:
2198 break
2199 visitor_data = try_get(
2200 response,
2201 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2202 compat_str) or visitor_data
2203
2204 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2205
2206 continuation = None
2207 if isinstance(continuation_contents, list):
2208 for continuation_section in continuation_contents:
2209 if not isinstance(continuation_section, dict):
2210 continue
2211 continuation_items = try_get(
2212 continuation_section,
2213 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2214 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2215 list) or []
2216 if is_first_continuation:
2217 continuation = extract_header(continuation_items)
2218 is_first_continuation = False
2219 if continuation:
2220 break
2221 continue
2222 count = 0
2223 for count, entry in enumerate(extract_thread(continuation_items)):
2224 yield entry
2225 continuation = self._extract_continuation({'contents': continuation_items})
2226 if continuation:
2227 # Sometimes YouTube provides a continuation without any comments
2228 # In most cases we end up just downloading these with very little comments to come.
2229 if count == 0:
2230 if not parent:
2231 self.report_warning('No comments received - assuming end of comments')
2232 continuation = None
2233 break
2234
2235 # Deprecated response structure
2236 elif isinstance(continuation_contents, dict):
2237 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2238 for key, continuation_renderer in continuation_contents.items():
2239 if key not in known_continuation_renderers:
2240 continue
2241 if not isinstance(continuation_renderer, dict):
2242 continue
2243 if is_first_continuation:
2244 header_continuation_items = [continuation_renderer.get('header') or {}]
2245 continuation = extract_header(header_continuation_items)
2246 is_first_continuation = False
2247 if continuation:
2248 break
2249
2250 # Sometimes YouTube provides a continuation without any comments
2251 # In most cases we end up just downloading these with very little comments to come.
2252 count = 0
2253 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2254 yield entry
2255 continuation = self._extract_continuation(continuation_renderer)
2256 if count == 0:
2257 if not parent:
2258 self.report_warning('No comments received - assuming end of comments')
2259 continuation = None
2260 break
2261
2262 @staticmethod
2263 def _generate_comment_continuation(video_id):
2264 """
2265 Generates initial comment section continuation token from given video id
2266 """
2267 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2268 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2269 new_continuation_intlist = list(itertools.chain.from_iterable(
2270 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2271 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2272
2273 def _get_comments(self, ytcfg, video_id, contents, webpage):
2274 """Entry for comment extraction"""
2275 def _real_comment_extract(contents):
2276 renderer = next((
2277 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2278 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2279 yield from self._comment_entries(renderer, ytcfg, video_id)
2280
2281 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
2282 # Force English regardless of account setting to prevent parsing issues
2283 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2284 ytcfg = copy.deepcopy(ytcfg)
2285 traverse_obj(
2286 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2287 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
2288
2289 @staticmethod
2290 def _get_checkok_params():
2291 return {'contentCheckOk': True, 'racyCheckOk': True}
2292
2293 @classmethod
2294 def _generate_player_context(cls, sts=None):
2295 context = {
2296 'html5Preference': 'HTML5_PREF_WANTS',
2297 }
2298 if sts is not None:
2299 context['signatureTimestamp'] = sts
2300 return {
2301 'playbackContext': {
2302 'contentPlaybackContext': context
2303 },
2304 **cls._get_checkok_params()
2305 }
2306
2307 @staticmethod
2308 def _is_agegated(player_response):
2309 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2310 return True
2311
2312 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2313 AGE_GATE_REASONS = (
2314 'confirm your age', 'age-restricted', 'inappropriate', # reason
2315 'age_verification_required', 'age_check_required', # status
2316 )
2317 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2318
2319 @staticmethod
2320 def _is_unplayable(player_response):
2321 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2322
2323 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2324
2325 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2326 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2327 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2328 headers = self.generate_api_headers(
2329 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2330
2331 yt_query = {'videoId': video_id}
2332 yt_query.update(self._generate_player_context(sts))
2333 return self._extract_response(
2334 item_id=video_id, ep='player', query=yt_query,
2335 ytcfg=player_ytcfg, headers=headers, fatal=True,
2336 default_client=client,
2337 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2338 ) or None
2339
2340 def _get_requested_clients(self, url, smuggled_data):
2341 requested_clients = []
2342 default = ['android', 'web']
2343 allowed_clients = sorted(
2344 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2345 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2346 for client in self._configuration_arg('player_client'):
2347 if client in allowed_clients:
2348 requested_clients.append(client)
2349 elif client == 'default':
2350 requested_clients.extend(default)
2351 elif client == 'all':
2352 requested_clients.extend(allowed_clients)
2353 else:
2354 self.report_warning(f'Skipping unsupported client {client}')
2355 if not requested_clients:
2356 requested_clients = default
2357
2358 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2359 requested_clients.extend(
2360 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2361
2362 return orderedSet(requested_clients)
2363
2364 def _extract_player_ytcfg(self, client, video_id):
2365 url = {
2366 'web_music': 'https://music.youtube.com',
2367 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2368 }.get(client)
2369 if not url:
2370 return {}
2371 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2372 return self.extract_ytcfg(video_id, webpage) or {}
2373
2374 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2375 initial_pr = None
2376 if webpage:
2377 initial_pr = self._extract_yt_initial_variable(
2378 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2379 video_id, 'initial player response')
2380
2381 original_clients = clients
2382 clients = clients[::-1]
2383 prs = []
2384
2385 def append_client(client_name):
2386 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2387 clients.append(client_name)
2388
2389 # Android player_response does not have microFormats which are needed for
2390 # extraction of some data. So we return the initial_pr with formats
2391 # stripped out even if not requested by the user
2392 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2393 if initial_pr:
2394 pr = dict(initial_pr)
2395 pr['streamingData'] = None
2396 prs.append(pr)
2397
2398 last_error = None
2399 tried_iframe_fallback = False
2400 player_url = None
2401 while clients:
2402 client = clients.pop()
2403 player_ytcfg = master_ytcfg if client == 'web' else {}
2404 if 'configs' not in self._configuration_arg('player_skip'):
2405 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2406
2407 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2408 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2409 if 'js' in self._configuration_arg('player_skip'):
2410 require_js_player = False
2411 player_url = None
2412
2413 if not player_url and not tried_iframe_fallback and require_js_player:
2414 player_url = self._download_player_url(video_id)
2415 tried_iframe_fallback = True
2416
2417 try:
2418 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2419 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2420 except ExtractorError as e:
2421 if last_error:
2422 self.report_warning(last_error)
2423 last_error = e
2424 continue
2425
2426 if pr:
2427 prs.append(pr)
2428
2429 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2430 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2431 append_client(client.replace('_agegate', '_creator'))
2432 elif self._is_agegated(pr):
2433 append_client(f'{client}_agegate')
2434
2435 if last_error:
2436 if not len(prs):
2437 raise last_error
2438 self.report_warning(last_error)
2439 return prs, player_url
2440
2441 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2442 itags, stream_ids = {}, []
2443 itag_qualities, res_qualities = {}, {}
2444 q = qualities([
2445 # Normally tiny is the smallest video-only formats. But
2446 # audio-only formats with unknown quality may get tagged as tiny
2447 'tiny',
2448 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2449 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2450 ])
2451 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2452
2453 for fmt in streaming_formats:
2454 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2455 continue
2456
2457 itag = str_or_none(fmt.get('itag'))
2458 audio_track = fmt.get('audioTrack') or {}
2459 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2460 if stream_id in stream_ids:
2461 continue
2462
2463 quality = fmt.get('quality')
2464 height = int_or_none(fmt.get('height'))
2465 if quality == 'tiny' or not quality:
2466 quality = fmt.get('audioQuality', '').lower() or quality
2467 # The 3gp format (17) in android client has a quality of "small",
2468 # but is actually worse than other formats
2469 if itag == '17':
2470 quality = 'tiny'
2471 if quality:
2472 if itag:
2473 itag_qualities[itag] = quality
2474 if height:
2475 res_qualities[height] = quality
2476 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2477 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2478 # number of fragment that would subsequently requested with (`&sq=N`)
2479 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2480 continue
2481
2482 fmt_url = fmt.get('url')
2483 if not fmt_url:
2484 sc = compat_parse_qs(fmt.get('signatureCipher'))
2485 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2486 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2487 if not (sc and fmt_url and encrypted_sig):
2488 continue
2489 if not player_url:
2490 continue
2491 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2492 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2493 fmt_url += '&' + sp + '=' + signature
2494
2495 query = parse_qs(fmt_url)
2496 throttled = False
2497 if query.get('ratebypass') != ['yes'] and query.get('n'):
2498 try:
2499 fmt_url = update_url_query(fmt_url, {
2500 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2501 except ExtractorError as e:
2502 self.report_warning(
2503 f'nsig extraction failed: You may experience throttling for some formats\n'
2504 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
2505 throttled = True
2506
2507 if itag:
2508 itags[itag] = 'https'
2509 stream_ids.append(stream_id)
2510
2511 tbr = float_or_none(
2512 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2513 dct = {
2514 'asr': int_or_none(fmt.get('audioSampleRate')),
2515 'filesize': int_or_none(fmt.get('contentLength')),
2516 'format_id': itag,
2517 'format_note': join_nonempty(
2518 '%s%s' % (audio_track.get('displayName') or '',
2519 ' (default)' if audio_track.get('audioIsDefault') else ''),
2520 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
2521 throttled and 'THROTTLED', delim=', '),
2522 'source_preference': -10 if throttled else -1,
2523 'fps': int_or_none(fmt.get('fps')) or None,
2524 'height': height,
2525 'quality': q(quality),
2526 'tbr': tbr,
2527 'url': fmt_url,
2528 'width': int_or_none(fmt.get('width')),
2529 'language': audio_track.get('id', '').split('.')[0],
2530 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2531 }
2532 mime_mobj = re.match(
2533 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2534 if mime_mobj:
2535 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2536 dct.update(parse_codecs(mime_mobj.group(2)))
2537 no_audio = dct.get('acodec') == 'none'
2538 no_video = dct.get('vcodec') == 'none'
2539 if no_audio:
2540 dct['vbr'] = tbr
2541 if no_video:
2542 dct['abr'] = tbr
2543 if no_audio or no_video:
2544 dct['downloader_options'] = {
2545 # Youtube throttles chunks >~10M
2546 'http_chunk_size': 10485760,
2547 }
2548 if dct.get('ext'):
2549 dct['container'] = dct['ext'] + '_dash'
2550 yield dct
2551
2552 skip_manifests = self._configuration_arg('skip')
2553 get_dash = (
2554 (not is_live or self._configuration_arg('include_live_dash'))
2555 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2556 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2557
2558 def process_manifest_format(f, proto, itag):
2559 if itag in itags:
2560 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2561 return False
2562 itag = f'{itag}-{proto}'
2563 if itag:
2564 f['format_id'] = itag
2565 itags[itag] = proto
2566
2567 f['quality'] = next((
2568 q(qdict[val])
2569 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
2570 if val in qdict), -1)
2571 return True
2572
2573 for sd in streaming_data:
2574 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2575 if hls_manifest_url:
2576 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2577 if process_manifest_format(f, 'hls', self._search_regex(
2578 r'/itag/(\d+)', f['url'], 'itag', default=None)):
2579 yield f
2580
2581 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2582 if dash_manifest_url:
2583 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2584 if process_manifest_format(f, 'dash', f['format_id']):
2585 f['filesize'] = int_or_none(self._search_regex(
2586 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2587 yield f
2588
2589 def _real_extract(self, url):
2590 url, smuggled_data = unsmuggle_url(url, {})
2591 video_id = self._match_id(url)
2592
2593 base_url = self.http_scheme() + '//www.youtube.com/'
2594 webpage_url = base_url + 'watch?v=' + video_id
2595 webpage = None
2596 if 'webpage' not in self._configuration_arg('player_skip'):
2597 webpage = self._download_webpage(
2598 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2599
2600 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2601
2602 player_responses, player_url = self._extract_player_responses(
2603 self._get_requested_clients(url, smuggled_data),
2604 video_id, webpage, master_ytcfg)
2605
2606 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2607
2608 playability_statuses = traverse_obj(
2609 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2610
2611 trailer_video_id = get_first(
2612 playability_statuses,
2613 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2614 expected_type=str)
2615 if trailer_video_id:
2616 return self.url_result(
2617 trailer_video_id, self.ie_key(), trailer_video_id)
2618
2619 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2620 if webpage else (lambda x: None))
2621
2622 video_details = traverse_obj(
2623 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2624 microformats = traverse_obj(
2625 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2626 expected_type=dict, default=[])
2627 video_title = (
2628 get_first(video_details, 'title')
2629 or self._get_text(microformats, (..., 'title'))
2630 or search_meta(['og:title', 'twitter:title', 'title']))
2631 video_description = get_first(video_details, 'shortDescription')
2632
2633 multifeed_metadata_list = get_first(
2634 player_responses,
2635 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2636 expected_type=str)
2637 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2638 if self.get_param('noplaylist'):
2639 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2640 else:
2641 entries = []
2642 feed_ids = []
2643 for feed in multifeed_metadata_list.split(','):
2644 # Unquote should take place before split on comma (,) since textual
2645 # fields may contain comma as well (see
2646 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2647 feed_data = compat_parse_qs(
2648 compat_urllib_parse_unquote_plus(feed))
2649
2650 def feed_entry(name):
2651 return try_get(
2652 feed_data, lambda x: x[name][0], compat_str)
2653
2654 feed_id = feed_entry('id')
2655 if not feed_id:
2656 continue
2657 feed_title = feed_entry('title')
2658 title = video_title
2659 if feed_title:
2660 title += ' (%s)' % feed_title
2661 entries.append({
2662 '_type': 'url_transparent',
2663 'ie_key': 'Youtube',
2664 'url': smuggle_url(
2665 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2666 {'force_singlefeed': True}),
2667 'title': title,
2668 })
2669 feed_ids.append(feed_id)
2670 self.to_screen(
2671 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2672 % (', '.join(feed_ids), video_id))
2673 return self.playlist_result(
2674 entries, video_id, video_title, video_description)
2675
2676 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2677 is_live = get_first(video_details, 'isLive')
2678 if is_live is None:
2679 is_live = get_first(live_broadcast_details, 'isLiveNow')
2680
2681 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2682 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2683
2684 if not formats:
2685 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2686 self.report_drm(video_id)
2687 pemr = get_first(
2688 playability_statuses,
2689 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2690 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2691 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2692 if subreason:
2693 if subreason == 'The uploader has not made this video available in your country.':
2694 countries = get_first(microformats, 'availableCountries')
2695 if not countries:
2696 regions_allowed = search_meta('regionsAllowed')
2697 countries = regions_allowed.split(',') if regions_allowed else None
2698 self.raise_geo_restricted(subreason, countries, metadata_available=True)
2699 reason += f'. {subreason}'
2700 if reason:
2701 self.raise_no_formats(reason, expected=True)
2702
2703 # Source is given priority since formats that throttle are given lower source_preference
2704 # When throttling issue is fully fixed, remove this
2705 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2706
2707 keywords = get_first(video_details, 'keywords', expected_type=list) or []
2708 if not keywords and webpage:
2709 keywords = [
2710 unescapeHTML(m.group('content'))
2711 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2712 for keyword in keywords:
2713 if keyword.startswith('yt:stretch='):
2714 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2715 if mobj:
2716 # NB: float is intentional for forcing float division
2717 w, h = (float(v) for v in mobj.groups())
2718 if w > 0 and h > 0:
2719 ratio = w / h
2720 for f in formats:
2721 if f.get('vcodec') != 'none':
2722 f['stretched_ratio'] = ratio
2723 break
2724
2725 thumbnails = []
2726 thumbnail_dicts = traverse_obj(
2727 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2728 expected_type=dict, default=[])
2729 for thumbnail in thumbnail_dicts:
2730 thumbnail_url = thumbnail.get('url')
2731 if not thumbnail_url:
2732 continue
2733 # Sometimes youtube gives a wrong thumbnail URL. See:
2734 # https://github.com/yt-dlp/yt-dlp/issues/233
2735 # https://github.com/ytdl-org/youtube-dl/issues/28023
2736 if 'maxresdefault' in thumbnail_url:
2737 thumbnail_url = thumbnail_url.split('?')[0]
2738 thumbnails.append({
2739 'url': thumbnail_url,
2740 'height': int_or_none(thumbnail.get('height')),
2741 'width': int_or_none(thumbnail.get('width')),
2742 })
2743 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2744 if thumbnail_url:
2745 thumbnails.append({
2746 'url': thumbnail_url,
2747 })
2748 original_thumbnails = thumbnails.copy()
2749
2750 # The best resolution thumbnails sometimes does not appear in the webpage
2751 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2752 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2753 thumbnail_names = [
2754 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
2755 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2756 'mqdefault', 'mq1', 'mq2', 'mq3',
2757 'default', '1', '2', '3'
2758 ]
2759 n_thumbnail_names = len(thumbnail_names)
2760 thumbnails.extend({
2761 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2762 video_id=video_id, name=name, ext=ext,
2763 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2764 } for name in thumbnail_names for ext in ('webp', 'jpg'))
2765 for thumb in thumbnails:
2766 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2767 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2768 self._remove_duplicate_formats(thumbnails)
2769 self._downloader._sort_thumbnails(original_thumbnails)
2770
2771 category = get_first(microformats, 'category') or search_meta('genre')
2772 channel_id = str_or_none(
2773 get_first(video_details, 'channelId')
2774 or get_first(microformats, 'externalChannelId')
2775 or search_meta('channelId'))
2776 duration = int_or_none(
2777 get_first(video_details, 'lengthSeconds')
2778 or get_first(microformats, 'lengthSeconds')
2779 or parse_duration(search_meta('duration'))) or None
2780 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2781
2782 live_content = get_first(video_details, 'isLiveContent')
2783 is_upcoming = get_first(video_details, 'isUpcoming')
2784 if is_live is None:
2785 if is_upcoming or live_content is False:
2786 is_live = False
2787 if is_upcoming is None and (live_content or is_live):
2788 is_upcoming = False
2789 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2790 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2791 if not duration and live_endtime and live_starttime:
2792 duration = live_endtime - live_starttime
2793
2794 info = {
2795 'id': video_id,
2796 'title': self._live_title(video_title) if is_live else video_title,
2797 'formats': formats,
2798 'thumbnails': thumbnails,
2799 # The best thumbnail that we are sure exists. Prevents unnecessary
2800 # URL checking if user don't care about getting the best possible thumbnail
2801 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
2802 'description': video_description,
2803 'upload_date': unified_strdate(
2804 get_first(microformats, 'uploadDate')
2805 or search_meta('uploadDate')),
2806 'uploader': get_first(video_details, 'author'),
2807 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2808 'uploader_url': owner_profile_url,
2809 'channel_id': channel_id,
2810 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2811 'duration': duration,
2812 'view_count': int_or_none(
2813 get_first((video_details, microformats), (..., 'viewCount'))
2814 or search_meta('interactionCount')),
2815 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2816 'age_limit': 18 if (
2817 get_first(microformats, 'isFamilySafe') is False
2818 or search_meta('isFamilyFriendly') == 'false'
2819 or search_meta('og:restrictions:age') == '18+') else 0,
2820 'webpage_url': webpage_url,
2821 'categories': [category] if category else None,
2822 'tags': keywords,
2823 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2824 'is_live': is_live,
2825 'was_live': (False if is_live or is_upcoming or live_content is False
2826 else None if is_live is None or is_upcoming is None
2827 else live_content),
2828 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2829 'release_timestamp': live_starttime,
2830 }
2831
2832 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2833 if pctr:
2834 def get_lang_code(track):
2835 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
2836 or track.get('languageCode'))
2837
2838 # Converted into dicts to remove duplicates
2839 captions = {
2840 get_lang_code(sub): sub
2841 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2842 translation_languages = {
2843 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
2844 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2845
2846 def process_language(container, base_url, lang_code, sub_name, query):
2847 lang_subs = container.setdefault(lang_code, [])
2848 for fmt in self._SUBTITLE_FORMATS:
2849 query.update({
2850 'fmt': fmt,
2851 })
2852 lang_subs.append({
2853 'ext': fmt,
2854 'url': update_url_query(base_url, query),
2855 'name': sub_name,
2856 })
2857
2858 subtitles, automatic_captions = {}, {}
2859 for lang_code, caption_track in captions.items():
2860 base_url = caption_track.get('baseUrl')
2861 if not base_url:
2862 continue
2863 lang_name = self._get_text(caption_track, 'name', max_runs=1)
2864 if caption_track.get('kind') != 'asr':
2865 if not lang_code:
2866 continue
2867 process_language(
2868 subtitles, base_url, lang_code, lang_name, {})
2869 if not caption_track.get('isTranslatable'):
2870 continue
2871 for trans_code, trans_name in translation_languages.items():
2872 if not trans_code:
2873 continue
2874 if caption_track.get('kind') != 'asr':
2875 trans_code += f'-{lang_code}'
2876 trans_name += format_field(lang_name, template=' from %s')
2877 process_language(
2878 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
2879 info['automatic_captions'] = automatic_captions
2880 info['subtitles'] = subtitles
2881
2882 parsed_url = compat_urllib_parse_urlparse(url)
2883 for component in [parsed_url.fragment, parsed_url.query]:
2884 query = compat_parse_qs(component)
2885 for k, v in query.items():
2886 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2887 d_k += '_time'
2888 if d_k not in info and k in s_ks:
2889 info[d_k] = parse_duration(query[k][0])
2890
2891 # Youtube Music Auto-generated description
2892 if video_description:
2893 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2894 if mobj:
2895 release_year = mobj.group('release_year')
2896 release_date = mobj.group('release_date')
2897 if release_date:
2898 release_date = release_date.replace('-', '')
2899 if not release_year:
2900 release_year = release_date[:4]
2901 info.update({
2902 'album': mobj.group('album'.strip()),
2903 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2904 'track': mobj.group('track').strip(),
2905 'release_date': release_date,
2906 'release_year': int_or_none(release_year),
2907 })
2908
2909 initial_data = None
2910 if webpage:
2911 initial_data = self._extract_yt_initial_variable(
2912 webpage, self._YT_INITIAL_DATA_RE, video_id,
2913 'yt initial data')
2914 if not initial_data:
2915 query = {'videoId': video_id}
2916 query.update(self._get_checkok_params())
2917 initial_data = self._extract_response(
2918 item_id=video_id, ep='next', fatal=False,
2919 ytcfg=master_ytcfg, query=query,
2920 headers=self.generate_api_headers(ytcfg=master_ytcfg),
2921 note='Downloading initial data API JSON')
2922
2923 try:
2924 # This will error if there is no livechat
2925 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2926 info.setdefault('subtitles', {})['live_chat'] = [{
2927 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2928 'video_id': video_id,
2929 'ext': 'json',
2930 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2931 }]
2932 except (KeyError, IndexError, TypeError):
2933 pass
2934
2935 if initial_data:
2936 info['chapters'] = (
2937 self._extract_chapters_from_json(initial_data, duration)
2938 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2939 or None)
2940
2941 contents = try_get(
2942 initial_data,
2943 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2944 list) or []
2945 for content in contents:
2946 vpir = content.get('videoPrimaryInfoRenderer')
2947 if vpir:
2948 stl = vpir.get('superTitleLink')
2949 if stl:
2950 stl = self._get_text(stl)
2951 if try_get(
2952 vpir,
2953 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2954 info['location'] = stl
2955 else:
2956 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2957 if mobj:
2958 info.update({
2959 'series': mobj.group(1),
2960 'season_number': int(mobj.group(2)),
2961 'episode_number': int(mobj.group(3)),
2962 })
2963 for tlb in (try_get(
2964 vpir,
2965 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2966 list) or []):
2967 tbr = tlb.get('toggleButtonRenderer') or {}
2968 for getter, regex in [(
2969 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2970 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2971 lambda x: x['accessibility'],
2972 lambda x: x['accessibilityData']['accessibilityData'],
2973 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2974 label = (try_get(tbr, getter, dict) or {}).get('label')
2975 if label:
2976 mobj = re.match(regex, label)
2977 if mobj:
2978 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2979 break
2980 sbr_tooltip = try_get(
2981 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2982 if sbr_tooltip:
2983 like_count, dislike_count = sbr_tooltip.split(' / ')
2984 info.update({
2985 'like_count': str_to_int(like_count),
2986 'dislike_count': str_to_int(dislike_count),
2987 })
2988 vsir = content.get('videoSecondaryInfoRenderer')
2989 if vsir:
2990 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
2991 rows = try_get(
2992 vsir,
2993 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2994 list) or []
2995 multiple_songs = False
2996 for row in rows:
2997 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2998 multiple_songs = True
2999 break
3000 for row in rows:
3001 mrr = row.get('metadataRowRenderer') or {}
3002 mrr_title = mrr.get('title')
3003 if not mrr_title:
3004 continue
3005 mrr_title = self._get_text(mrr, 'title')
3006 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3007 if mrr_title == 'License':
3008 info['license'] = mrr_contents_text
3009 elif not multiple_songs:
3010 if mrr_title == 'Album':
3011 info['album'] = mrr_contents_text
3012 elif mrr_title == 'Artist':
3013 info['artist'] = mrr_contents_text
3014 elif mrr_title == 'Song':
3015 info['track'] = mrr_contents_text
3016
3017 fallbacks = {
3018 'channel': 'uploader',
3019 'channel_id': 'uploader_id',
3020 'channel_url': 'uploader_url',
3021 }
3022 for to, frm in fallbacks.items():
3023 if not info.get(to):
3024 info[to] = info.get(frm)
3025
3026 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3027 v = info.get(s_k)
3028 if v:
3029 info[d_k] = v
3030
3031 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3032 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3033 is_membersonly = None
3034 is_premium = None
3035 if initial_data and is_private is not None:
3036 is_membersonly = False
3037 is_premium = False
3038 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3039 badge_labels = set()
3040 for content in contents:
3041 if not isinstance(content, dict):
3042 continue
3043 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3044 for badge_label in badge_labels:
3045 if badge_label.lower() == 'members only':
3046 is_membersonly = True
3047 elif badge_label.lower() == 'premium':
3048 is_premium = True
3049 elif badge_label.lower() == 'unlisted':
3050 is_unlisted = True
3051
3052 info['availability'] = self._availability(
3053 is_private=is_private,
3054 needs_premium=is_premium,
3055 needs_subscription=is_membersonly,
3056 needs_auth=info['age_limit'] >= 18,
3057 is_unlisted=None if is_private is None else is_unlisted)
3058
3059 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3060
3061 self.mark_watched(video_id, player_responses)
3062
3063 return info
3064
3065
3066class YoutubeTabIE(YoutubeBaseInfoExtractor):
3067 IE_DESC = 'YouTube Tabs'
3068 _VALID_URL = r'''(?x)
3069 https?://
3070 (?:\w+\.)?
3071 (?:
3072 youtube(?:kids)?\.com|
3073 %(invidious)s
3074 )/
3075 (?:
3076 (?P<channel_type>channel|c|user|browse)/|
3077 (?P<not_channel>
3078 feed/|hashtag/|
3079 (?:playlist|watch)\?.*?\blist=
3080 )|
3081 (?!(?:%(reserved_names)s)\b) # Direct URLs
3082 )
3083 (?P<id>[^/?\#&]+)
3084 ''' % {
3085 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3086 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3087 }
3088 IE_NAME = 'youtube:tab'
3089
3090 _TESTS = [{
3091 'note': 'playlists, multipage',
3092 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3093 'playlist_mincount': 94,
3094 'info_dict': {
3095 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3096 'title': 'Игорь Клейнер - Playlists',
3097 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3098 'uploader': 'Игорь Клейнер',
3099 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3100 },
3101 }, {
3102 'note': 'playlists, multipage, different order',
3103 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3104 'playlist_mincount': 94,
3105 'info_dict': {
3106 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3107 'title': 'Игорь Клейнер - Playlists',
3108 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3109 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3110 'uploader': 'Игорь Клейнер',
3111 },
3112 }, {
3113 'note': 'playlists, series',
3114 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3115 'playlist_mincount': 5,
3116 'info_dict': {
3117 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3118 'title': '3Blue1Brown - Playlists',
3119 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3120 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3121 'uploader': '3Blue1Brown',
3122 },
3123 }, {
3124 'note': 'playlists, singlepage',
3125 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3126 'playlist_mincount': 4,
3127 'info_dict': {
3128 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3129 'title': 'ThirstForScience - Playlists',
3130 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3131 'uploader': 'ThirstForScience',
3132 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3133 }
3134 }, {
3135 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3136 'only_matching': True,
3137 }, {
3138 'note': 'basic, single video playlist',
3139 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3140 'info_dict': {
3141 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3142 'uploader': 'Sergey M.',
3143 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3144 'title': 'youtube-dl public playlist',
3145 },
3146 'playlist_count': 1,
3147 }, {
3148 'note': 'empty playlist',
3149 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3150 'info_dict': {
3151 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3152 'uploader': 'Sergey M.',
3153 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3154 'title': 'youtube-dl empty playlist',
3155 },
3156 'playlist_count': 0,
3157 }, {
3158 'note': 'Home tab',
3159 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3160 'info_dict': {
3161 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3162 'title': 'lex will - Home',
3163 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3164 'uploader': 'lex will',
3165 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3166 },
3167 'playlist_mincount': 2,
3168 }, {
3169 'note': 'Videos tab',
3170 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3171 'info_dict': {
3172 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3173 'title': 'lex will - Videos',
3174 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3175 'uploader': 'lex will',
3176 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3177 },
3178 'playlist_mincount': 975,
3179 }, {
3180 'note': 'Videos tab, sorted by popular',
3181 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3182 'info_dict': {
3183 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3184 'title': 'lex will - Videos',
3185 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3186 'uploader': 'lex will',
3187 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3188 },
3189 'playlist_mincount': 199,
3190 }, {
3191 'note': 'Playlists tab',
3192 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3193 'info_dict': {
3194 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3195 'title': 'lex will - Playlists',
3196 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3197 'uploader': 'lex will',
3198 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3199 },
3200 'playlist_mincount': 17,
3201 }, {
3202 'note': 'Community tab',
3203 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3204 'info_dict': {
3205 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3206 'title': 'lex will - Community',
3207 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3208 'uploader': 'lex will',
3209 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3210 },
3211 'playlist_mincount': 18,
3212 }, {
3213 'note': 'Channels tab',
3214 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3215 'info_dict': {
3216 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3217 'title': 'lex will - Channels',
3218 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3219 'uploader': 'lex will',
3220 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3221 },
3222 'playlist_mincount': 12,
3223 }, {
3224 'note': 'Search tab',
3225 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3226 'playlist_mincount': 40,
3227 'info_dict': {
3228 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3229 'title': '3Blue1Brown - Search - linear algebra',
3230 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3231 'uploader': '3Blue1Brown',
3232 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3233 },
3234 }, {
3235 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3236 'only_matching': True,
3237 }, {
3238 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3239 'only_matching': True,
3240 }, {
3241 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3242 'only_matching': True,
3243 }, {
3244 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3245 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3246 'info_dict': {
3247 'title': '29C3: Not my department',
3248 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3249 'uploader': 'Christiaan008',
3250 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3251 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3252 },
3253 'playlist_count': 96,
3254 }, {
3255 'note': 'Large playlist',
3256 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3257 'info_dict': {
3258 'title': 'Uploads from Cauchemar',
3259 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3260 'uploader': 'Cauchemar',
3261 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3262 },
3263 'playlist_mincount': 1123,
3264 }, {
3265 'note': 'even larger playlist, 8832 videos',
3266 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3267 'only_matching': True,
3268 }, {
3269 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3270 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3271 'info_dict': {
3272 'title': 'Uploads from Interstellar Movie',
3273 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3274 'uploader': 'Interstellar Movie',
3275 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3276 },
3277 'playlist_mincount': 21,
3278 }, {
3279 'note': 'Playlist with "show unavailable videos" button',
3280 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3281 'info_dict': {
3282 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3283 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3284 'uploader': 'Phim Siêu Nhân Nhật Bản',
3285 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3286 },
3287 'playlist_mincount': 200,
3288 }, {
3289 'note': 'Playlist with unavailable videos in page 7',
3290 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3291 'info_dict': {
3292 'title': 'Uploads from BlankTV',
3293 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3294 'uploader': 'BlankTV',
3295 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3296 },
3297 'playlist_mincount': 1000,
3298 }, {
3299 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3300 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3301 'info_dict': {
3302 'title': 'Data Analysis with Dr Mike Pound',
3303 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3304 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3305 'uploader': 'Computerphile',
3306 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3307 },
3308 'playlist_mincount': 11,
3309 }, {
3310 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3311 'only_matching': True,
3312 }, {
3313 'note': 'Playlist URL that does not actually serve a playlist',
3314 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3315 'info_dict': {
3316 'id': 'FqZTN594JQw',
3317 'ext': 'webm',
3318 'title': "Smiley's People 01 detective, Adventure Series, Action",
3319 'uploader': 'STREEM',
3320 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3321 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3322 'upload_date': '20150526',
3323 'license': 'Standard YouTube License',
3324 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3325 'categories': ['People & Blogs'],
3326 'tags': list,
3327 'view_count': int,
3328 'like_count': int,
3329 'dislike_count': int,
3330 },
3331 'params': {
3332 'skip_download': True,
3333 },
3334 'skip': 'This video is not available.',
3335 'add_ie': [YoutubeIE.ie_key()],
3336 }, {
3337 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3338 'only_matching': True,
3339 }, {
3340 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3341 'only_matching': True,
3342 }, {
3343 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3344 'info_dict': {
3345 'id': '3yImotZU3tw', # This will keep changing
3346 'ext': 'mp4',
3347 'title': compat_str,
3348 'uploader': 'Sky News',
3349 'uploader_id': 'skynews',
3350 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3351 'upload_date': r're:\d{8}',
3352 'description': compat_str,
3353 'categories': ['News & Politics'],
3354 'tags': list,
3355 'like_count': int,
3356 'dislike_count': int,
3357 },
3358 'params': {
3359 'skip_download': True,
3360 },
3361 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3362 }, {
3363 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3364 'info_dict': {
3365 'id': 'a48o2S1cPoo',
3366 'ext': 'mp4',
3367 'title': 'The Young Turks - Live Main Show',
3368 'uploader': 'The Young Turks',
3369 'uploader_id': 'TheYoungTurks',
3370 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3371 'upload_date': '20150715',
3372 'license': 'Standard YouTube License',
3373 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3374 'categories': ['News & Politics'],
3375 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3376 'like_count': int,
3377 'dislike_count': int,
3378 },
3379 'params': {
3380 'skip_download': True,
3381 },
3382 'only_matching': True,
3383 }, {
3384 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3385 'only_matching': True,
3386 }, {
3387 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3388 'only_matching': True,
3389 }, {
3390 'note': 'A channel that is not live. Should raise error',
3391 'url': 'https://www.youtube.com/user/numberphile/live',
3392 'only_matching': True,
3393 }, {
3394 'url': 'https://www.youtube.com/feed/trending',
3395 'only_matching': True,
3396 }, {
3397 'url': 'https://www.youtube.com/feed/library',
3398 'only_matching': True,
3399 }, {
3400 'url': 'https://www.youtube.com/feed/history',
3401 'only_matching': True,
3402 }, {
3403 'url': 'https://www.youtube.com/feed/subscriptions',
3404 'only_matching': True,
3405 }, {
3406 'url': 'https://www.youtube.com/feed/watch_later',
3407 'only_matching': True,
3408 }, {
3409 'note': 'Recommended - redirects to home page.',
3410 'url': 'https://www.youtube.com/feed/recommended',
3411 'only_matching': True,
3412 }, {
3413 'note': 'inline playlist with not always working continuations',
3414 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3415 'only_matching': True,
3416 }, {
3417 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3418 'only_matching': True,
3419 }, {
3420 'url': 'https://www.youtube.com/course',
3421 'only_matching': True,
3422 }, {
3423 'url': 'https://www.youtube.com/zsecurity',
3424 'only_matching': True,
3425 }, {
3426 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3427 'only_matching': True,
3428 }, {
3429 'url': 'https://www.youtube.com/TheYoungTurks/live',
3430 'only_matching': True,
3431 }, {
3432 'url': 'https://www.youtube.com/hashtag/cctv9',
3433 'info_dict': {
3434 'id': 'cctv9',
3435 'title': '#cctv9',
3436 },
3437 'playlist_mincount': 350,
3438 }, {
3439 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3440 'only_matching': True,
3441 }, {
3442 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3443 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3444 'only_matching': True
3445 }, {
3446 'note': '/browse/ should redirect to /channel/',
3447 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3448 'only_matching': True
3449 }, {
3450 'note': 'VLPL, should redirect to playlist?list=PL...',
3451 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3452 'info_dict': {
3453 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3454 'uploader': 'NoCopyrightSounds',
3455 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3456 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3457 'title': 'NCS Releases',
3458 },
3459 'playlist_mincount': 166,
3460 }, {
3461 'note': 'Topic, should redirect to playlist?list=UU...',
3462 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3463 'info_dict': {
3464 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3465 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3466 'title': 'Uploads from Royalty Free Music - Topic',
3467 'uploader': 'Royalty Free Music - Topic',
3468 },
3469 'expected_warnings': [
3470 'A channel/user page was given',
3471 'The URL does not have a videos tab',
3472 ],
3473 'playlist_mincount': 101,
3474 }, {
3475 'note': 'Topic without a UU playlist',
3476 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3477 'info_dict': {
3478 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3479 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3480 },
3481 'expected_warnings': [
3482 'A channel/user page was given',
3483 'The URL does not have a videos tab',
3484 'Falling back to channel URL',
3485 ],
3486 'playlist_mincount': 9,
3487 }, {
3488 'note': 'Youtube music Album',
3489 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3490 'info_dict': {
3491 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3492 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3493 },
3494 'playlist_count': 50,
3495 }, {
3496 'note': 'unlisted single video playlist',
3497 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3498 'info_dict': {
3499 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3500 'uploader': 'colethedj',
3501 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3502 'title': 'yt-dlp unlisted playlist test',
3503 'availability': 'unlisted'
3504 },
3505 'playlist_count': 1,
3506 }, {
3507 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
3508 'url': 'https://www.youtube.com/feed/recommended',
3509 'info_dict': {
3510 'id': 'recommended',
3511 'title': 'recommended',
3512 },
3513 'playlist_mincount': 50,
3514 'params': {
3515 'skip_download': True,
3516 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3517 },
3518 }, {
3519 'note': 'API Fallback: /videos tab, sorted by oldest first',
3520 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
3521 'info_dict': {
3522 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
3523 'title': 'Cody\'sLab - Videos',
3524 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
3525 'uploader': 'Cody\'sLab',
3526 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
3527 },
3528 'playlist_mincount': 650,
3529 'params': {
3530 'skip_download': True,
3531 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3532 },
3533 }, {
3534 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
3535 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3536 'info_dict': {
3537 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3538 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3539 'title': 'Uploads from Royalty Free Music - Topic',
3540 'uploader': 'Royalty Free Music - Topic',
3541 },
3542 'expected_warnings': [
3543 'A channel/user page was given',
3544 'The URL does not have a videos tab',
3545 ],
3546 'playlist_mincount': 101,
3547 'params': {
3548 'skip_download': True,
3549 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3550 },
3551 }]
3552
3553 @classmethod
3554 def suitable(cls, url):
3555 return False if YoutubeIE.suitable(url) else super(
3556 YoutubeTabIE, cls).suitable(url)
3557
3558 def _extract_channel_id(self, webpage):
3559 channel_id = self._html_search_meta(
3560 'channelId', webpage, 'channel id', default=None)
3561 if channel_id:
3562 return channel_id
3563 channel_url = self._html_search_meta(
3564 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3565 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3566 'twitter:app:url:googleplay'), webpage, 'channel url')
3567 return self._search_regex(
3568 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3569 channel_url, 'channel id')
3570
3571 @staticmethod
3572 def _extract_basic_item_renderer(item):
3573 # Modified from _extract_grid_item_renderer
3574 known_basic_renderers = (
3575 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3576 )
3577 for key, renderer in item.items():
3578 if not isinstance(renderer, dict):
3579 continue
3580 elif key in known_basic_renderers:
3581 return renderer
3582 elif key.startswith('grid') and key.endswith('Renderer'):
3583 return renderer
3584
3585 def _grid_entries(self, grid_renderer):
3586 for item in grid_renderer['items']:
3587 if not isinstance(item, dict):
3588 continue
3589 renderer = self._extract_basic_item_renderer(item)
3590 if not isinstance(renderer, dict):
3591 continue
3592 title = self._get_text(renderer, 'title')
3593
3594 # playlist
3595 playlist_id = renderer.get('playlistId')
3596 if playlist_id:
3597 yield self.url_result(
3598 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3599 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3600 video_title=title)
3601 continue
3602 # video
3603 video_id = renderer.get('videoId')
3604 if video_id:
3605 yield self._extract_video(renderer)
3606 continue
3607 # channel
3608 channel_id = renderer.get('channelId')
3609 if channel_id:
3610 yield self.url_result(
3611 'https://www.youtube.com/channel/%s' % channel_id,
3612 ie=YoutubeTabIE.ie_key(), video_title=title)
3613 continue
3614 # generic endpoint URL support
3615 ep_url = urljoin('https://www.youtube.com/', try_get(
3616 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3617 compat_str))
3618 if ep_url:
3619 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3620 if ie.suitable(ep_url):
3621 yield self.url_result(
3622 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3623 break
3624
3625 def _shelf_entries_from_content(self, shelf_renderer):
3626 content = shelf_renderer.get('content')
3627 if not isinstance(content, dict):
3628 return
3629 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3630 if renderer:
3631 # TODO: add support for nested playlists so each shelf is processed
3632 # as separate playlist
3633 # TODO: this includes only first N items
3634 for entry in self._grid_entries(renderer):
3635 yield entry
3636 renderer = content.get('horizontalListRenderer')
3637 if renderer:
3638 # TODO
3639 pass
3640
3641 def _shelf_entries(self, shelf_renderer, skip_channels=False):
3642 ep = try_get(
3643 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3644 compat_str)
3645 shelf_url = urljoin('https://www.youtube.com', ep)
3646 if shelf_url:
3647 # Skipping links to another channels, note that checking for
3648 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3649 # will not work
3650 if skip_channels and '/channels?' in shelf_url:
3651 return
3652 title = self._get_text(shelf_renderer, 'title')
3653 yield self.url_result(shelf_url, video_title=title)
3654 # Shelf may not contain shelf URL, fallback to extraction from content
3655 for entry in self._shelf_entries_from_content(shelf_renderer):
3656 yield entry
3657
3658 def _playlist_entries(self, video_list_renderer):
3659 for content in video_list_renderer['contents']:
3660 if not isinstance(content, dict):
3661 continue
3662 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3663 if not isinstance(renderer, dict):
3664 continue
3665 video_id = renderer.get('videoId')
3666 if not video_id:
3667 continue
3668 yield self._extract_video(renderer)
3669
3670 def _rich_entries(self, rich_grid_renderer):
3671 renderer = try_get(
3672 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3673 video_id = renderer.get('videoId')
3674 if not video_id:
3675 return
3676 yield self._extract_video(renderer)
3677
3678 def _video_entry(self, video_renderer):
3679 video_id = video_renderer.get('videoId')
3680 if video_id:
3681 return self._extract_video(video_renderer)
3682
3683 def _post_thread_entries(self, post_thread_renderer):
3684 post_renderer = try_get(
3685 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3686 if not post_renderer:
3687 return
3688 # video attachment
3689 video_renderer = try_get(
3690 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3691 video_id = video_renderer.get('videoId')
3692 if video_id:
3693 entry = self._extract_video(video_renderer)
3694 if entry:
3695 yield entry
3696 # playlist attachment
3697 playlist_id = try_get(
3698 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3699 if playlist_id:
3700 yield self.url_result(
3701 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3702 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3703 # inline video links
3704 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3705 for run in runs:
3706 if not isinstance(run, dict):
3707 continue
3708 ep_url = try_get(
3709 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3710 if not ep_url:
3711 continue
3712 if not YoutubeIE.suitable(ep_url):
3713 continue
3714 ep_video_id = YoutubeIE._match_id(ep_url)
3715 if video_id == ep_video_id:
3716 continue
3717 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3718
3719 def _post_thread_continuation_entries(self, post_thread_continuation):
3720 contents = post_thread_continuation.get('contents')
3721 if not isinstance(contents, list):
3722 return
3723 for content in contents:
3724 renderer = content.get('backstagePostThreadRenderer')
3725 if not isinstance(renderer, dict):
3726 continue
3727 for entry in self._post_thread_entries(renderer):
3728 yield entry
3729
3730 r''' # unused
3731 def _rich_grid_entries(self, contents):
3732 for content in contents:
3733 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3734 if video_renderer:
3735 entry = self._video_entry(video_renderer)
3736 if entry:
3737 yield entry
3738 '''
3739 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3740
3741 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3742 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3743 for content in contents:
3744 if not isinstance(content, dict):
3745 continue
3746 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3747 if not is_renderer:
3748 renderer = content.get('richItemRenderer')
3749 if renderer:
3750 for entry in self._rich_entries(renderer):
3751 yield entry
3752 continuation_list[0] = self._extract_continuation(parent_renderer)
3753 continue
3754 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3755 for isr_content in isr_contents:
3756 if not isinstance(isr_content, dict):
3757 continue
3758
3759 known_renderers = {
3760 'playlistVideoListRenderer': self._playlist_entries,
3761 'gridRenderer': self._grid_entries,
3762 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3763 'backstagePostThreadRenderer': self._post_thread_entries,
3764 'videoRenderer': lambda x: [self._video_entry(x)],
3765 }
3766 for key, renderer in isr_content.items():
3767 if key not in known_renderers:
3768 continue
3769 for entry in known_renderers[key](renderer):
3770 if entry:
3771 yield entry
3772 continuation_list[0] = self._extract_continuation(renderer)
3773 break
3774
3775 if not continuation_list[0]:
3776 continuation_list[0] = self._extract_continuation(is_renderer)
3777
3778 if not continuation_list[0]:
3779 continuation_list[0] = self._extract_continuation(parent_renderer)
3780
3781 continuation_list = [None] # Python 2 does not support nonlocal
3782 tab_content = try_get(tab, lambda x: x['content'], dict)
3783 if not tab_content:
3784 return
3785 parent_renderer = (
3786 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3787 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3788 for entry in extract_entries(parent_renderer):
3789 yield entry
3790 continuation = continuation_list[0]
3791
3792 for page_num in itertools.count(1):
3793 if not continuation:
3794 break
3795 headers = self.generate_api_headers(
3796 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3797 response = self._extract_response(
3798 item_id='%s page %s' % (item_id, page_num),
3799 query=continuation, headers=headers, ytcfg=ytcfg,
3800 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3801
3802 if not response:
3803 break
3804 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3805 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3806 visitor_data = self._extract_visitor_data(response) or visitor_data
3807
3808 known_continuation_renderers = {
3809 'playlistVideoListContinuation': self._playlist_entries,
3810 'gridContinuation': self._grid_entries,
3811 'itemSectionContinuation': self._post_thread_continuation_entries,
3812 'sectionListContinuation': extract_entries, # for feeds
3813 }
3814 continuation_contents = try_get(
3815 response, lambda x: x['continuationContents'], dict) or {}
3816 continuation_renderer = None
3817 for key, value in continuation_contents.items():
3818 if key not in known_continuation_renderers:
3819 continue
3820 continuation_renderer = value
3821 continuation_list = [None]
3822 for entry in known_continuation_renderers[key](continuation_renderer):
3823 yield entry
3824 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3825 break
3826 if continuation_renderer:
3827 continue
3828
3829 known_renderers = {
3830 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3831 'gridVideoRenderer': (self._grid_entries, 'items'),
3832 'gridChannelRenderer': (self._grid_entries, 'items'),
3833 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3834 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
3835 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
3836 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3837 }
3838 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3839 continuation_items = try_get(
3840 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3841 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3842 video_items_renderer = None
3843 for key, value in continuation_item.items():
3844 if key not in known_renderers:
3845 continue
3846 video_items_renderer = {known_renderers[key][1]: continuation_items}
3847 continuation_list = [None]
3848 for entry in known_renderers[key][0](video_items_renderer):
3849 yield entry
3850 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3851 break
3852 if video_items_renderer:
3853 continue
3854 break
3855
3856 @staticmethod
3857 def _extract_selected_tab(tabs):
3858 for tab in tabs:
3859 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3860 if renderer.get('selected') is True:
3861 return renderer
3862 else:
3863 raise ExtractorError('Unable to find selected tab')
3864
3865 @classmethod
3866 def _extract_uploader(cls, data):
3867 uploader = {}
3868 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3869 owner = try_get(
3870 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3871 if owner:
3872 uploader['uploader'] = owner.get('text')
3873 uploader['uploader_id'] = try_get(
3874 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3875 uploader['uploader_url'] = urljoin(
3876 'https://www.youtube.com/',
3877 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3878 return {k: v for k, v in uploader.items() if v is not None}
3879
3880 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
3881 playlist_id = title = description = channel_url = channel_name = channel_id = None
3882 thumbnails_list = []
3883 tags = []
3884
3885 selected_tab = self._extract_selected_tab(tabs)
3886 renderer = try_get(
3887 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3888 if renderer:
3889 channel_name = renderer.get('title')
3890 channel_url = renderer.get('channelUrl')
3891 channel_id = renderer.get('externalId')
3892 else:
3893 renderer = try_get(
3894 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3895
3896 if renderer:
3897 title = renderer.get('title')
3898 description = renderer.get('description', '')
3899 playlist_id = channel_id
3900 tags = renderer.get('keywords', '').split()
3901 thumbnails_list = (
3902 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3903 or try_get(
3904 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3905 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3906 list)
3907 or [])
3908
3909 thumbnails = []
3910 for t in thumbnails_list:
3911 if not isinstance(t, dict):
3912 continue
3913 thumbnail_url = url_or_none(t.get('url'))
3914 if not thumbnail_url:
3915 continue
3916 thumbnails.append({
3917 'url': thumbnail_url,
3918 'width': int_or_none(t.get('width')),
3919 'height': int_or_none(t.get('height')),
3920 })
3921 if playlist_id is None:
3922 playlist_id = item_id
3923 if title is None:
3924 title = (
3925 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3926 or playlist_id)
3927 title += format_field(selected_tab, 'title', ' - %s')
3928 title += format_field(selected_tab, 'expandedText', ' - %s')
3929 metadata = {
3930 'playlist_id': playlist_id,
3931 'playlist_title': title,
3932 'playlist_description': description,
3933 'uploader': channel_name,
3934 'uploader_id': channel_id,
3935 'uploader_url': channel_url,
3936 'thumbnails': thumbnails,
3937 'tags': tags,
3938 }
3939 availability = self._extract_availability(data)
3940 if availability:
3941 metadata['availability'] = availability
3942 if not channel_id:
3943 metadata.update(self._extract_uploader(data))
3944 metadata.update({
3945 'channel': metadata['uploader'],
3946 'channel_id': metadata['uploader_id'],
3947 'channel_url': metadata['uploader_url']})
3948 return self.playlist_result(
3949 self._entries(
3950 selected_tab, playlist_id, ytcfg,
3951 self._extract_account_syncid(ytcfg, data),
3952 self._extract_visitor_data(data, ytcfg)),
3953 **metadata)
3954
3955 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3956 first_id = last_id = response = None
3957 for page_num in itertools.count(1):
3958 videos = list(self._playlist_entries(playlist))
3959 if not videos:
3960 return
3961 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3962 if start >= len(videos):
3963 return
3964 for video in videos[start:]:
3965 if video['id'] == first_id:
3966 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3967 return
3968 yield video
3969 first_id = first_id or videos[0]['id']
3970 last_id = videos[-1]['id']
3971 watch_endpoint = try_get(
3972 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3973 headers = self.generate_api_headers(
3974 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3975 visitor_data=self._extract_visitor_data(response, data, ytcfg))
3976 query = {
3977 'playlistId': playlist_id,
3978 'videoId': watch_endpoint.get('videoId') or last_id,
3979 'index': watch_endpoint.get('index') or len(videos),
3980 'params': watch_endpoint.get('params') or 'OAE%3D'
3981 }
3982 response = self._extract_response(
3983 item_id='%s page %d' % (playlist_id, page_num),
3984 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3985 check_get_keys='contents'
3986 )
3987 playlist = try_get(
3988 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3989
3990 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
3991 title = playlist.get('title') or try_get(
3992 data, lambda x: x['titleText']['simpleText'], compat_str)
3993 playlist_id = playlist.get('playlistId') or item_id
3994
3995 # Delegating everything except mix playlists to regular tab-based playlist URL
3996 playlist_url = urljoin(url, try_get(
3997 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3998 compat_str))
3999 if playlist_url and playlist_url != url:
4000 return self.url_result(
4001 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4002 video_title=title)
4003
4004 return self.playlist_result(
4005 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
4006 playlist_id=playlist_id, playlist_title=title)
4007
4008 def _extract_availability(self, data):
4009 """
4010 Gets the availability of a given playlist/tab.
4011 Note: Unless YouTube tells us explicitly, we do not assume it is public
4012 @param data: response
4013 """
4014 is_private = is_unlisted = None
4015 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4016 badge_labels = self._extract_badges(renderer)
4017
4018 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4019 privacy_dropdown_entries = try_get(
4020 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4021 for renderer_dict in privacy_dropdown_entries:
4022 is_selected = try_get(
4023 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4024 if not is_selected:
4025 continue
4026 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4027 if label:
4028 badge_labels.add(label.lower())
4029 break
4030
4031 for badge_label in badge_labels:
4032 if badge_label == 'unlisted':
4033 is_unlisted = True
4034 elif badge_label == 'private':
4035 is_private = True
4036 elif badge_label == 'public':
4037 is_unlisted = is_private = False
4038 return self._availability(is_private, False, False, False, is_unlisted)
4039
4040 @staticmethod
4041 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4042 sidebar_renderer = try_get(
4043 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4044 for item in sidebar_renderer:
4045 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4046 if renderer:
4047 return renderer
4048
4049 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
4050 """
4051 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4052 """
4053 browse_id = params = None
4054 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4055 if not renderer:
4056 return
4057 menu_renderer = try_get(
4058 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4059 for menu_item in menu_renderer:
4060 if not isinstance(menu_item, dict):
4061 continue
4062 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4063 text = try_get(
4064 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4065 if not text or text.lower() != 'show unavailable videos':
4066 continue
4067 browse_endpoint = try_get(
4068 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4069 browse_id = browse_endpoint.get('browseId')
4070 params = browse_endpoint.get('params')
4071 break
4072
4073 headers = self.generate_api_headers(
4074 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4075 visitor_data=self._extract_visitor_data(data, ytcfg))
4076 query = {
4077 'params': params or 'wgYCCAA=',
4078 'browseId': browse_id or 'VL%s' % item_id
4079 }
4080 return self._extract_response(
4081 item_id=item_id, headers=headers, query=query,
4082 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4083 note='Downloading API JSON with unavailable videos')
4084
4085 def _extract_webpage(self, url, item_id, fatal=True):
4086 retries = self.get_param('extractor_retries', 3)
4087 count = -1
4088 webpage = data = last_error = None
4089 while count < retries:
4090 count += 1
4091 # Sometimes youtube returns a webpage with incomplete ytInitialData
4092 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4093 if last_error:
4094 self.report_warning('%s. Retrying ...' % last_error)
4095 try:
4096 webpage = self._download_webpage(
4097 url, item_id,
4098 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
4099 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4100 except ExtractorError as e:
4101 if isinstance(e.cause, network_exceptions):
4102 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
4103 last_error = error_to_compat_str(e.cause or e.msg)
4104 if count < retries:
4105 continue
4106 if fatal:
4107 raise
4108 self.report_warning(error_to_compat_str(e))
4109 break
4110 else:
4111 try:
4112 self._extract_and_report_alerts(data)
4113 except ExtractorError as e:
4114 if fatal:
4115 raise
4116 self.report_warning(error_to_compat_str(e))
4117 break
4118
4119 if dict_get(data, ('contents', 'currentVideoEndpoint')):
4120 break
4121
4122 last_error = 'Incomplete yt initial data received'
4123 if count >= retries:
4124 if fatal:
4125 raise ExtractorError(last_error)
4126 self.report_warning(last_error)
4127 break
4128
4129 return webpage, data
4130
4131 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4132 data = None
4133 if 'webpage' not in self._configuration_arg('skip'):
4134 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4135 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4136 if not data:
4137 if not ytcfg and self.is_authenticated:
4138 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
4139 if 'authcheck' not in self._configuration_arg('skip') and fatal:
4140 raise ExtractorError(
4141 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
4142 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4143 expected=True)
4144 self.report_warning(msg, only_once=True)
4145 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4146 return data, ytcfg
4147
4148 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4149 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4150 resolve_response = self._extract_response(
4151 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4152 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4153 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4154 for ep_key, ep in endpoints.items():
4155 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4156 if params:
4157 return self._extract_response(
4158 item_id=item_id, query=params, ep=ep, headers=headers,
4159 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4160 check_get_keys=('contents', 'currentVideoEndpoint'))
4161 err_note = 'Failed to resolve url (does the playlist exist?)'
4162 if fatal:
4163 raise ExtractorError(err_note, expected=True)
4164 self.report_warning(err_note, item_id)
4165
4166 @staticmethod
4167 def _smuggle_data(entries, data):
4168 for entry in entries:
4169 if data:
4170 entry['url'] = smuggle_url(entry['url'], data)
4171 yield entry
4172
4173 def _real_extract(self, url):
4174 url, smuggled_data = unsmuggle_url(url, {})
4175 if self.is_music_url(url):
4176 smuggled_data['is_music_url'] = True
4177 info_dict = self.__real_extract(url, smuggled_data)
4178 if info_dict.get('entries'):
4179 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4180 return info_dict
4181
4182 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4183
4184 def __real_extract(self, url, smuggled_data):
4185 item_id = self._match_id(url)
4186 url = compat_urlparse.urlunparse(
4187 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4188 compat_opts = self.get_param('compat_opts', [])
4189
4190 def get_mobj(url):
4191 mobj = self._url_re.match(url).groupdict()
4192 mobj.update((k, '') for k, v in mobj.items() if v is None)
4193 return mobj
4194
4195 mobj = get_mobj(url)
4196 # Youtube returns incomplete data if tabname is not lower case
4197 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4198 if is_channel:
4199 if smuggled_data.get('is_music_url'):
4200 if item_id[:2] == 'VL':
4201 # Youtube music VL channels have an equivalent playlist
4202 item_id = item_id[2:]
4203 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4204 elif item_id[:2] == 'MP':
4205 # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4206 mdata = self._extract_tab_endpoint(
4207 'https://music.youtube.com/channel/%s' % item_id, item_id, default_client='web_music')
4208 murl = traverse_obj(
4209 mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), get_all=False, expected_type=compat_str)
4210 if not murl:
4211 raise ExtractorError('Failed to resolve album to playlist.')
4212 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
4213 elif mobj['channel_type'] == 'browse':
4214 # Youtube music /browse/ should be changed to /channel/
4215 pre = 'https://www.youtube.com/channel/%s' % item_id
4216 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4217 # Home URLs should redirect to /videos/
4218 self.report_warning(
4219 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4220 'To download only the videos in the home page, add a "/featured" to the URL')
4221 tab = '/videos'
4222
4223 url = ''.join((pre, tab, post))
4224 mobj = get_mobj(url)
4225
4226 # Handle both video/playlist URLs
4227 qs = parse_qs(url)
4228 video_id = qs.get('v', [None])[0]
4229 playlist_id = qs.get('list', [None])[0]
4230
4231 if not video_id and mobj['not_channel'].startswith('watch'):
4232 if not playlist_id:
4233 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4234 raise ExtractorError('Unable to recognize tab page')
4235 # Common mistake: https://www.youtube.com/watch?list=playlist_id
4236 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4237 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4238 mobj = get_mobj(url)
4239
4240 if video_id and playlist_id:
4241 if self.get_param('noplaylist'):
4242 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4243 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4244 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4245
4246 data, ytcfg = self._extract_data(url, item_id)
4247
4248 tabs = try_get(
4249 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4250 if tabs:
4251 selected_tab = self._extract_selected_tab(tabs)
4252 tab_name = selected_tab.get('title', '')
4253 if 'no-youtube-channel-redirect' not in compat_opts:
4254 if mobj['tab'] == '/live':
4255 # Live tab should have redirected to the video
4256 raise ExtractorError('The channel is not currently live', expected=True)
4257 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4258 if not mobj['not_channel'] and item_id[:2] == 'UC':
4259 # Topic channels don't have /videos. Use the equivalent playlist instead
4260 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4261 pl_id = 'UU%s' % item_id[2:]
4262 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4263 try:
4264 data, ytcfg, item_id, url = *self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True), pl_id, pl_url
4265 except ExtractorError:
4266 self.report_warning('The playlist gave error. Falling back to channel URL')
4267 else:
4268 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4269
4270 self.write_debug('Final URL: %s' % url)
4271
4272 # YouTube sometimes provides a button to reload playlist with unavailable videos.
4273 if 'no-youtube-unavailable-videos' not in compat_opts:
4274 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
4275 self._extract_and_report_alerts(data, only_once=True)
4276 tabs = try_get(
4277 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4278 if tabs:
4279 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
4280
4281 playlist = try_get(
4282 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4283 if playlist:
4284 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
4285
4286 video_id = try_get(
4287 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4288 compat_str) or video_id
4289 if video_id:
4290 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4291 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4292 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4293
4294 raise ExtractorError('Unable to recognize tab page')
4295
4296
4297class YoutubePlaylistIE(InfoExtractor):
4298 IE_DESC = 'YouTube playlists'
4299 _VALID_URL = r'''(?x)(?:
4300 (?:https?://)?
4301 (?:\w+\.)?
4302 (?:
4303 (?:
4304 youtube(?:kids)?\.com|
4305 %(invidious)s
4306 )
4307 /.*?\?.*?\blist=
4308 )?
4309 (?P<id>%(playlist_id)s)
4310 )''' % {
4311 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4312 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4313 }
4314 IE_NAME = 'youtube:playlist'
4315 _TESTS = [{
4316 'note': 'issue #673',
4317 'url': 'PLBB231211A4F62143',
4318 'info_dict': {
4319 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4320 'id': 'PLBB231211A4F62143',
4321 'uploader': 'Wickydoo',
4322 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4323 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4324 },
4325 'playlist_mincount': 29,
4326 }, {
4327 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4328 'info_dict': {
4329 'title': 'YDL_safe_search',
4330 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4331 },
4332 'playlist_count': 2,
4333 'skip': 'This playlist is private',
4334 }, {
4335 'note': 'embedded',
4336 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4337 'playlist_count': 4,
4338 'info_dict': {
4339 'title': 'JODA15',
4340 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4341 'uploader': 'milan',
4342 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4343 }
4344 }, {
4345 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4346 'playlist_mincount': 654,
4347 'info_dict': {
4348 'title': '2018 Chinese New Singles (11/6 updated)',
4349 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4350 'uploader': 'LBK',
4351 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4352 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4353 }
4354 }, {
4355 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4356 'only_matching': True,
4357 }, {
4358 # music album playlist
4359 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4360 'only_matching': True,
4361 }]
4362
4363 @classmethod
4364 def suitable(cls, url):
4365 if YoutubeTabIE.suitable(url):
4366 return False
4367 from ..utils import parse_qs
4368 qs = parse_qs(url)
4369 if qs.get('v', [None])[0]:
4370 return False
4371 return super(YoutubePlaylistIE, cls).suitable(url)
4372
4373 def _real_extract(self, url):
4374 playlist_id = self._match_id(url)
4375 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4376 url = update_url_query(
4377 'https://www.youtube.com/playlist',
4378 parse_qs(url) or {'list': playlist_id})
4379 if is_music_url:
4380 url = smuggle_url(url, {'is_music_url': True})
4381 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4382
4383
4384class YoutubeYtBeIE(InfoExtractor):
4385 IE_DESC = 'youtu.be'
4386 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4387 _TESTS = [{
4388 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4389 'info_dict': {
4390 'id': 'yeWKywCrFtk',
4391 'ext': 'mp4',
4392 'title': 'Small Scale Baler and Braiding Rugs',
4393 'uploader': 'Backus-Page House Museum',
4394 'uploader_id': 'backuspagemuseum',
4395 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4396 'upload_date': '20161008',
4397 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4398 'categories': ['Nonprofits & Activism'],
4399 'tags': list,
4400 'like_count': int,
4401 'dislike_count': int,
4402 },
4403 'params': {
4404 'noplaylist': True,
4405 'skip_download': True,
4406 },
4407 }, {
4408 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4409 'only_matching': True,
4410 }]
4411
4412 def _real_extract(self, url):
4413 mobj = self._match_valid_url(url)
4414 video_id = mobj.group('id')
4415 playlist_id = mobj.group('playlist_id')
4416 return self.url_result(
4417 update_url_query('https://www.youtube.com/watch', {
4418 'v': video_id,
4419 'list': playlist_id,
4420 'feature': 'youtu.be',
4421 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4422
4423
4424class YoutubeYtUserIE(InfoExtractor):
4425 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
4426 _VALID_URL = r'ytuser:(?P<id>.+)'
4427 _TESTS = [{
4428 'url': 'ytuser:phihag',
4429 'only_matching': True,
4430 }]
4431
4432 def _real_extract(self, url):
4433 user_id = self._match_id(url)
4434 return self.url_result(
4435 'https://www.youtube.com/user/%s/videos' % user_id,
4436 ie=YoutubeTabIE.ie_key(), video_id=user_id)
4437
4438
4439class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4440 IE_NAME = 'youtube:favorites'
4441 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
4442 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4443 _LOGIN_REQUIRED = True
4444 _TESTS = [{
4445 'url': ':ytfav',
4446 'only_matching': True,
4447 }, {
4448 'url': ':ytfavorites',
4449 'only_matching': True,
4450 }]
4451
4452 def _real_extract(self, url):
4453 return self.url_result(
4454 'https://www.youtube.com/playlist?list=LL',
4455 ie=YoutubeTabIE.ie_key())
4456
4457
4458class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4459 IE_DESC = 'YouTube searches'
4460 IE_NAME = 'youtube:search'
4461 _SEARCH_KEY = 'ytsearch'
4462 _SEARCH_PARAMS = None
4463 _TESTS = []
4464
4465 def _search_results(self, query):
4466 data = {'query': query}
4467 if self._SEARCH_PARAMS:
4468 data['params'] = self._SEARCH_PARAMS
4469 continuation = {}
4470 for page_num in itertools.count(1):
4471 data.update(continuation)
4472 search = self._extract_response(
4473 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4474 check_get_keys=('contents', 'onResponseReceivedCommands')
4475 )
4476 if not search:
4477 break
4478 slr_contents = try_get(
4479 search,
4480 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4481 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4482 list)
4483 if not slr_contents:
4484 break
4485
4486 # Youtube sometimes adds promoted content to searches,
4487 # changing the index location of videos and token.
4488 # So we search through all entries till we find them.
4489 continuation = None
4490 for slr_content in slr_contents:
4491 if not continuation:
4492 continuation = self._extract_continuation({'contents': [slr_content]})
4493
4494 isr_contents = try_get(
4495 slr_content,
4496 lambda x: x['itemSectionRenderer']['contents'],
4497 list)
4498 if not isr_contents:
4499 continue
4500 for content in isr_contents:
4501 if not isinstance(content, dict):
4502 continue
4503 video = content.get('videoRenderer')
4504 if not isinstance(video, dict):
4505 continue
4506 video_id = video.get('videoId')
4507 if not video_id:
4508 continue
4509
4510 yield self._extract_video(video)
4511
4512 if not continuation:
4513 break
4514
4515
4516class YoutubeSearchDateIE(YoutubeSearchIE):
4517 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4518 _SEARCH_KEY = 'ytsearchdate'
4519 IE_DESC = 'YouTube searches, newest videos first'
4520 _SEARCH_PARAMS = 'CAI%3D'
4521
4522
4523class YoutubeSearchURLIE(YoutubeSearchIE):
4524 IE_DESC = 'YouTube search URLs with sorting and filter support'
4525 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4526 _SEARCH_KEY = None
4527 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4528 # _MAX_RESULTS = 100
4529 _TESTS = [{
4530 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4531 'playlist_mincount': 5,
4532 'info_dict': {
4533 'id': 'youtube-dl test video',
4534 'title': 'youtube-dl test video',
4535 }
4536 }, {
4537 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4538 'only_matching': True,
4539 }]
4540
4541 @classmethod
4542 def _make_valid_url(cls):
4543 return cls._VALID_URL
4544
4545 def _real_extract(self, url):
4546 qs = parse_qs(url)
4547 query = (qs.get('search_query') or qs.get('q'))[0]
4548 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4549 return self._get_n_results(query, self._MAX_RESULTS)
4550
4551
4552class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4553 """
4554 Base class for feed extractors
4555 Subclasses must define the _FEED_NAME property.
4556 """
4557 _LOGIN_REQUIRED = True
4558 _TESTS = []
4559
4560 @property
4561 def IE_NAME(self):
4562 return 'youtube:%s' % self._FEED_NAME
4563
4564 def _real_extract(self, url):
4565 return self.url_result(
4566 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4567 ie=YoutubeTabIE.ie_key())
4568
4569
4570class YoutubeWatchLaterIE(InfoExtractor):
4571 IE_NAME = 'youtube:watchlater'
4572 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
4573 _VALID_URL = r':ytwatchlater'
4574 _TESTS = [{
4575 'url': ':ytwatchlater',
4576 'only_matching': True,
4577 }]
4578
4579 def _real_extract(self, url):
4580 return self.url_result(
4581 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4582
4583
4584class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4585 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
4586 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4587 _FEED_NAME = 'recommended'
4588 _LOGIN_REQUIRED = False
4589 _TESTS = [{
4590 'url': ':ytrec',
4591 'only_matching': True,
4592 }, {
4593 'url': ':ytrecommended',
4594 'only_matching': True,
4595 }, {
4596 'url': 'https://youtube.com',
4597 'only_matching': True,
4598 }]
4599
4600
4601class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4602 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
4603 _VALID_URL = r':ytsub(?:scription)?s?'
4604 _FEED_NAME = 'subscriptions'
4605 _TESTS = [{
4606 'url': ':ytsubs',
4607 'only_matching': True,
4608 }, {
4609 'url': ':ytsubscriptions',
4610 'only_matching': True,
4611 }]
4612
4613
4614class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4615 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
4616 _VALID_URL = r':ythis(?:tory)?'
4617 _FEED_NAME = 'history'
4618 _TESTS = [{
4619 'url': ':ythistory',
4620 'only_matching': True,
4621 }]
4622
4623
4624class YoutubeTruncatedURLIE(InfoExtractor):
4625 IE_NAME = 'youtube:truncated_url'
4626 IE_DESC = False # Do not list
4627 _VALID_URL = r'''(?x)
4628 (?:https?://)?
4629 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4630 (?:watch\?(?:
4631 feature=[a-z_]+|
4632 annotation_id=annotation_[^&]+|
4633 x-yt-cl=[0-9]+|
4634 hl=[^&]*|
4635 t=[0-9]+
4636 )?
4637 |
4638 attribution_link\?a=[^&]+
4639 )
4640 $
4641 '''
4642
4643 _TESTS = [{
4644 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4645 'only_matching': True,
4646 }, {
4647 'url': 'https://www.youtube.com/watch?',
4648 'only_matching': True,
4649 }, {
4650 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4651 'only_matching': True,
4652 }, {
4653 'url': 'https://www.youtube.com/watch?feature=foo',
4654 'only_matching': True,
4655 }, {
4656 'url': 'https://www.youtube.com/watch?hl=en-GB',
4657 'only_matching': True,
4658 }, {
4659 'url': 'https://www.youtube.com/watch?t=2372',
4660 'only_matching': True,
4661 }]
4662
4663 def _real_extract(self, url):
4664 raise ExtractorError(
4665 'Did you forget to quote the URL? Remember that & is a meta '
4666 'character in most shells, so you want to put the URL in quotes, '
4667 'like youtube-dl '
4668 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4669 ' or simply youtube-dl BaW_jenozKc .',
4670 expected=True)
4671
4672
4673class YoutubeClipIE(InfoExtractor):
4674 IE_NAME = 'youtube:clip'
4675 IE_DESC = False # Do not list
4676 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4677
4678 def _real_extract(self, url):
4679 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4680 return self.url_result(url, 'Generic')
4681
4682
4683class YoutubeTruncatedIDIE(InfoExtractor):
4684 IE_NAME = 'youtube:truncated_id'
4685 IE_DESC = False # Do not list
4686 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4687
4688 _TESTS = [{
4689 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4690 'only_matching': True,
4691 }]
4692
4693 def _real_extract(self, url):
4694 video_id = self._match_id(url)
4695 raise ExtractorError(
4696 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4697 expected=True)