]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[youtube:comments] Add more options for limiting number of comments extracted (#1626)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 import calendar
6 import copy
7 import datetime
8 import hashlib
9 import itertools
10 import json
11 import math
12 import os.path
13 import random
14 import re
15 import sys
16 import time
17 import traceback
18
19 from .common import InfoExtractor, SearchInfoExtractor
20 from ..compat import (
21 compat_chr,
22 compat_HTTPError,
23 compat_parse_qs,
24 compat_str,
25 compat_urllib_parse_unquote_plus,
26 compat_urllib_parse_urlencode,
27 compat_urllib_parse_urlparse,
28 compat_urlparse,
29 )
30 from ..jsinterp import JSInterpreter
31 from ..utils import (
32 bug_reports_message,
33 clean_html,
34 datetime_from_str,
35 dict_get,
36 error_to_compat_str,
37 ExtractorError,
38 float_or_none,
39 format_field,
40 int_or_none,
41 is_html,
42 join_nonempty,
43 mimetype2ext,
44 network_exceptions,
45 NO_DEFAULT,
46 orderedSet,
47 parse_codecs,
48 parse_count,
49 parse_duration,
50 parse_iso8601,
51 parse_qs,
52 qualities,
53 remove_end,
54 remove_start,
55 smuggle_url,
56 str_or_none,
57 str_to_int,
58 traverse_obj,
59 try_get,
60 unescapeHTML,
61 unified_strdate,
62 unsmuggle_url,
63 update_url_query,
64 url_or_none,
65 urljoin,
66 variadic,
67 )
68
69
70 def get_first(obj, keys, **kwargs):
71 return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
72
73
74 # any clients starting with _ cannot be explicity requested by the user
75 INNERTUBE_CLIENTS = {
76 'web': {
77 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
78 'INNERTUBE_CONTEXT': {
79 'client': {
80 'clientName': 'WEB',
81 'clientVersion': '2.20210622.10.00',
82 }
83 },
84 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
85 },
86 'web_embedded': {
87 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
88 'INNERTUBE_CONTEXT': {
89 'client': {
90 'clientName': 'WEB_EMBEDDED_PLAYER',
91 'clientVersion': '1.20210620.0.1',
92 },
93 },
94 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
95 },
96 'web_music': {
97 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
98 'INNERTUBE_HOST': 'music.youtube.com',
99 'INNERTUBE_CONTEXT': {
100 'client': {
101 'clientName': 'WEB_REMIX',
102 'clientVersion': '1.20210621.00.00',
103 }
104 },
105 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
106 },
107 'web_creator': {
108 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
109 'INNERTUBE_CONTEXT': {
110 'client': {
111 'clientName': 'WEB_CREATOR',
112 'clientVersion': '1.20210621.00.00',
113 }
114 },
115 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
116 },
117 'android': {
118 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
119 'INNERTUBE_CONTEXT': {
120 'client': {
121 'clientName': 'ANDROID',
122 'clientVersion': '16.20',
123 }
124 },
125 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
126 'REQUIRE_JS_PLAYER': False
127 },
128 'android_embedded': {
129 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
130 'INNERTUBE_CONTEXT': {
131 'client': {
132 'clientName': 'ANDROID_EMBEDDED_PLAYER',
133 'clientVersion': '16.20',
134 },
135 },
136 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
137 'REQUIRE_JS_PLAYER': False
138 },
139 'android_music': {
140 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
141 'INNERTUBE_HOST': 'music.youtube.com',
142 'INNERTUBE_CONTEXT': {
143 'client': {
144 'clientName': 'ANDROID_MUSIC',
145 'clientVersion': '4.32',
146 }
147 },
148 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
149 'REQUIRE_JS_PLAYER': False
150 },
151 'android_creator': {
152 'INNERTUBE_CONTEXT': {
153 'client': {
154 'clientName': 'ANDROID_CREATOR',
155 'clientVersion': '21.24.100',
156 },
157 },
158 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
159 'REQUIRE_JS_PLAYER': False
160 },
161 # ios has HLS live streams
162 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
163 'ios': {
164 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
165 'INNERTUBE_CONTEXT': {
166 'client': {
167 'clientName': 'IOS',
168 'clientVersion': '16.20',
169 }
170 },
171 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
172 'REQUIRE_JS_PLAYER': False
173 },
174 'ios_embedded': {
175 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
176 'INNERTUBE_CONTEXT': {
177 'client': {
178 'clientName': 'IOS_MESSAGES_EXTENSION',
179 'clientVersion': '16.20',
180 },
181 },
182 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
183 'REQUIRE_JS_PLAYER': False
184 },
185 'ios_music': {
186 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
187 'INNERTUBE_HOST': 'music.youtube.com',
188 'INNERTUBE_CONTEXT': {
189 'client': {
190 'clientName': 'IOS_MUSIC',
191 'clientVersion': '4.32',
192 },
193 },
194 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
195 'REQUIRE_JS_PLAYER': False
196 },
197 'ios_creator': {
198 'INNERTUBE_CONTEXT': {
199 'client': {
200 'clientName': 'IOS_CREATOR',
201 'clientVersion': '21.24.100',
202 },
203 },
204 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
205 'REQUIRE_JS_PLAYER': False
206 },
207 # mweb has 'ultralow' formats
208 # See: https://github.com/yt-dlp/yt-dlp/pull/557
209 'mweb': {
210 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
211 'INNERTUBE_CONTEXT': {
212 'client': {
213 'clientName': 'MWEB',
214 'clientVersion': '2.20210721.07.00',
215 }
216 },
217 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
218 },
219 }
220
221
222 def build_innertube_clients():
223 third_party = {
224 'embedUrl': 'https://google.com', # Can be any valid URL
225 }
226 base_clients = ('android', 'web', 'ios', 'mweb')
227 priority = qualities(base_clients[::-1])
228
229 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
230 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
231 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
232 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
233 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
234 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
235
236 if client in base_clients:
237 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
238 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
239 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
240 agegate_ytcfg['priority'] -= 1
241 elif client.endswith('_embedded'):
242 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
243 ytcfg['priority'] -= 2
244 else:
245 ytcfg['priority'] -= 3
246
247
248 build_innertube_clients()
249
250
251 class YoutubeBaseInfoExtractor(InfoExtractor):
252 """Provide base functions for Youtube extractors"""
253
254 _RESERVED_NAMES = (
255 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
256 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
257 r'browse|oembed|get_video_info|iframe_api|s/player|'
258 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
259
260 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
261
262 _NETRC_MACHINE = 'youtube'
263
264 # If True it will raise an error if no login info is provided
265 _LOGIN_REQUIRED = False
266
267 _INVIDIOUS_SITES = (
268 # invidious-redirect websites
269 r'(?:www\.)?redirect\.invidious\.io',
270 r'(?:(?:www|dev)\.)?invidio\.us',
271 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
272 r'(?:www\.)?invidious\.pussthecat\.org',
273 r'(?:www\.)?invidious\.zee\.li',
274 r'(?:www\.)?invidious\.ethibox\.fr',
275 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
276 # youtube-dl invidious instances list
277 r'(?:(?:www|no)\.)?invidiou\.sh',
278 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
279 r'(?:www\.)?invidious\.kabi\.tk',
280 r'(?:www\.)?invidious\.mastodon\.host',
281 r'(?:www\.)?invidious\.zapashcanon\.fr',
282 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
283 r'(?:www\.)?invidious\.tinfoil-hat\.net',
284 r'(?:www\.)?invidious\.himiko\.cloud',
285 r'(?:www\.)?invidious\.reallyancient\.tech',
286 r'(?:www\.)?invidious\.tube',
287 r'(?:www\.)?invidiou\.site',
288 r'(?:www\.)?invidious\.site',
289 r'(?:www\.)?invidious\.xyz',
290 r'(?:www\.)?invidious\.nixnet\.xyz',
291 r'(?:www\.)?invidious\.048596\.xyz',
292 r'(?:www\.)?invidious\.drycat\.fr',
293 r'(?:www\.)?inv\.skyn3t\.in',
294 r'(?:www\.)?tube\.poal\.co',
295 r'(?:www\.)?tube\.connect\.cafe',
296 r'(?:www\.)?vid\.wxzm\.sx',
297 r'(?:www\.)?vid\.mint\.lgbt',
298 r'(?:www\.)?vid\.puffyan\.us',
299 r'(?:www\.)?yewtu\.be',
300 r'(?:www\.)?yt\.elukerio\.org',
301 r'(?:www\.)?yt\.lelux\.fi',
302 r'(?:www\.)?invidious\.ggc-project\.de',
303 r'(?:www\.)?yt\.maisputain\.ovh',
304 r'(?:www\.)?ytprivate\.com',
305 r'(?:www\.)?invidious\.13ad\.de',
306 r'(?:www\.)?invidious\.toot\.koeln',
307 r'(?:www\.)?invidious\.fdn\.fr',
308 r'(?:www\.)?watch\.nettohikari\.com',
309 r'(?:www\.)?invidious\.namazso\.eu',
310 r'(?:www\.)?invidious\.silkky\.cloud',
311 r'(?:www\.)?invidious\.exonip\.de',
312 r'(?:www\.)?invidious\.riverside\.rocks',
313 r'(?:www\.)?invidious\.blamefran\.net',
314 r'(?:www\.)?invidious\.moomoo\.de',
315 r'(?:www\.)?ytb\.trom\.tf',
316 r'(?:www\.)?yt\.cyberhost\.uk',
317 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
318 r'(?:www\.)?qklhadlycap4cnod\.onion',
319 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
320 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
321 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
322 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
323 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
324 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
325 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
326 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
327 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
328 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
329 )
330
331 def _login(self):
332 """
333 Attempt to log in to YouTube.
334 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
335 """
336
337 if (self._LOGIN_REQUIRED
338 and self.get_param('cookiefile') is None
339 and self.get_param('cookiesfrombrowser') is None):
340 self.raise_login_required(
341 'Login details are needed to download this content', method='cookies')
342 username, password = self._get_login_info()
343 if username:
344 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
345
346 def _initialize_consent(self):
347 cookies = self._get_cookies('https://www.youtube.com/')
348 if cookies.get('__Secure-3PSID'):
349 return
350 consent_id = None
351 consent = cookies.get('CONSENT')
352 if consent:
353 if 'YES' in consent.value:
354 return
355 consent_id = self._search_regex(
356 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
357 if not consent_id:
358 consent_id = random.randint(100, 999)
359 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
360
361 def _real_initialize(self):
362 self._initialize_consent()
363 self._login()
364
365 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
366 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
367 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
368
369 def _get_default_ytcfg(self, client='web'):
370 return copy.deepcopy(INNERTUBE_CLIENTS[client])
371
372 def _get_innertube_host(self, client='web'):
373 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
374
375 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
376 # try_get but with fallback to default ytcfg client values when present
377 _func = lambda y: try_get(y, getter, expected_type)
378 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
379
380 def _extract_client_name(self, ytcfg, default_client='web'):
381 return self._ytcfg_get_safe(
382 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
383 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
384
385 def _extract_client_version(self, ytcfg, default_client='web'):
386 return self._ytcfg_get_safe(
387 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
388 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
389
390 def _extract_api_key(self, ytcfg=None, default_client='web'):
391 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
392
393 def _extract_context(self, ytcfg=None, default_client='web'):
394 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
395 context = _get_context(ytcfg)
396 if context:
397 return context
398
399 context = _get_context(self._get_default_ytcfg(default_client))
400 if not ytcfg:
401 return context
402
403 # Recreate the client context (required)
404 context['client'].update({
405 'clientVersion': self._extract_client_version(ytcfg, default_client),
406 'clientName': self._extract_client_name(ytcfg, default_client),
407 })
408 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
409 if visitor_data:
410 context['client']['visitorData'] = visitor_data
411 return context
412
413 _SAPISID = None
414
415 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
416 time_now = round(time.time())
417 if self._SAPISID is None:
418 yt_cookies = self._get_cookies('https://www.youtube.com')
419 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
420 # See: https://github.com/yt-dlp/yt-dlp/issues/393
421 sapisid_cookie = dict_get(
422 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
423 if sapisid_cookie and sapisid_cookie.value:
424 self._SAPISID = sapisid_cookie.value
425 self.write_debug('Extracted SAPISID cookie')
426 # SAPISID cookie is required if not already present
427 if not yt_cookies.get('SAPISID'):
428 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
429 self._set_cookie(
430 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
431 else:
432 self._SAPISID = False
433 if not self._SAPISID:
434 return None
435 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
436 sapisidhash = hashlib.sha1(
437 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
438 return f'SAPISIDHASH {time_now}_{sapisidhash}'
439
440 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
441 note='Downloading API JSON', errnote='Unable to download API page',
442 context=None, api_key=None, api_hostname=None, default_client='web'):
443
444 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
445 data.update(query)
446 real_headers = self.generate_api_headers(default_client=default_client)
447 real_headers.update({'content-type': 'application/json'})
448 if headers:
449 real_headers.update(headers)
450 return self._download_json(
451 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
452 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
453 data=json.dumps(data).encode('utf8'), headers=real_headers,
454 query={'key': api_key or self._extract_api_key()})
455
456 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
457 data = self._search_regex(
458 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
459 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
460 if data:
461 return self._parse_json(data, item_id, fatal=fatal)
462
463 @staticmethod
464 def _extract_session_index(*data):
465 """
466 Index of current account in account list.
467 See: https://github.com/yt-dlp/yt-dlp/pull/519
468 """
469 for ytcfg in data:
470 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
471 if session_index is not None:
472 return session_index
473
474 # Deprecated?
475 def _extract_identity_token(self, ytcfg=None, webpage=None):
476 if ytcfg:
477 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
478 if token:
479 return token
480 if webpage:
481 return self._search_regex(
482 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
483 'identity token', default=None, fatal=False)
484
485 @staticmethod
486 def _extract_account_syncid(*args):
487 """
488 Extract syncId required to download private playlists of secondary channels
489 @params response and/or ytcfg
490 """
491 for data in args:
492 # ytcfg includes channel_syncid if on secondary channel
493 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
494 if delegated_sid:
495 return delegated_sid
496 sync_ids = (try_get(
497 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
498 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
499 if len(sync_ids) >= 2 and sync_ids[1]:
500 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
501 # and just "user_syncid||" for primary channel. We only want the channel_syncid
502 return sync_ids[0]
503
504 @staticmethod
505 def _extract_visitor_data(*args):
506 """
507 Extracts visitorData from an API response or ytcfg
508 Appears to be used to track session state
509 """
510 return get_first(
511 args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
512 expected_type=str)
513
514 @property
515 def is_authenticated(self):
516 return bool(self._generate_sapisidhash_header())
517
518 def extract_ytcfg(self, video_id, webpage):
519 if not webpage:
520 return {}
521 return self._parse_json(
522 self._search_regex(
523 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
524 default='{}'), video_id, fatal=False) or {}
525
526 def generate_api_headers(
527 self, *, ytcfg=None, account_syncid=None, session_index=None,
528 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
529
530 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
531 headers = {
532 'X-YouTube-Client-Name': compat_str(
533 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
534 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
535 'Origin': origin,
536 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
537 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
538 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
539 }
540 if session_index is None:
541 session_index = self._extract_session_index(ytcfg)
542 if account_syncid or session_index is not None:
543 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
544
545 auth = self._generate_sapisidhash_header(origin)
546 if auth is not None:
547 headers['Authorization'] = auth
548 headers['X-Origin'] = origin
549 return {h: v for h, v in headers.items() if v is not None}
550
551 @staticmethod
552 def _build_api_continuation_query(continuation, ctp=None):
553 query = {
554 'continuation': continuation
555 }
556 # TODO: Inconsistency with clickTrackingParams.
557 # Currently we have a fixed ctp contained within context (from ytcfg)
558 # and a ctp in root query for continuation.
559 if ctp:
560 query['clickTracking'] = {'clickTrackingParams': ctp}
561 return query
562
563 @classmethod
564 def _extract_next_continuation_data(cls, renderer):
565 next_continuation = try_get(
566 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
567 lambda x: x['continuation']['reloadContinuationData']), dict)
568 if not next_continuation:
569 return
570 continuation = next_continuation.get('continuation')
571 if not continuation:
572 return
573 ctp = next_continuation.get('clickTrackingParams')
574 return cls._build_api_continuation_query(continuation, ctp)
575
576 @classmethod
577 def _extract_continuation_ep_data(cls, continuation_ep: dict):
578 if isinstance(continuation_ep, dict):
579 continuation = try_get(
580 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
581 if not continuation:
582 return
583 ctp = continuation_ep.get('clickTrackingParams')
584 return cls._build_api_continuation_query(continuation, ctp)
585
586 @classmethod
587 def _extract_continuation(cls, renderer):
588 next_continuation = cls._extract_next_continuation_data(renderer)
589 if next_continuation:
590 return next_continuation
591
592 contents = []
593 for key in ('contents', 'items'):
594 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
595
596 for content in contents:
597 if not isinstance(content, dict):
598 continue
599 continuation_ep = try_get(
600 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
601 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
602 dict)
603 continuation = cls._extract_continuation_ep_data(continuation_ep)
604 if continuation:
605 return continuation
606
607 @classmethod
608 def _extract_alerts(cls, data):
609 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
610 if not isinstance(alert_dict, dict):
611 continue
612 for alert in alert_dict.values():
613 alert_type = alert.get('type')
614 if not alert_type:
615 continue
616 message = cls._get_text(alert, 'text')
617 if message:
618 yield alert_type, message
619
620 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
621 errors = []
622 warnings = []
623 for alert_type, alert_message in alerts:
624 if alert_type.lower() == 'error' and fatal:
625 errors.append([alert_type, alert_message])
626 else:
627 warnings.append([alert_type, alert_message])
628
629 for alert_type, alert_message in (warnings + errors[:-1]):
630 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
631 if errors:
632 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
633
634 def _extract_and_report_alerts(self, data, *args, **kwargs):
635 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
636
637 def _extract_badges(self, renderer: dict):
638 badges = set()
639 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
640 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
641 if label:
642 badges.add(label.lower())
643 return badges
644
645 @staticmethod
646 def _get_text(data, *path_list, max_runs=None):
647 for path in path_list or [None]:
648 if path is None:
649 obj = [data]
650 else:
651 obj = traverse_obj(data, path, default=[])
652 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
653 obj = [obj]
654 for item in obj:
655 text = try_get(item, lambda x: x['simpleText'], compat_str)
656 if text:
657 return text
658 runs = try_get(item, lambda x: x['runs'], list) or []
659 if not runs and isinstance(item, list):
660 runs = item
661
662 runs = runs[:min(len(runs), max_runs or len(runs))]
663 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
664 if text:
665 return text
666
667 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
668 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
669 default_client='web'):
670 response = None
671 last_error = None
672 count = -1
673 retries = self.get_param('extractor_retries', 3)
674 if check_get_keys is None:
675 check_get_keys = []
676 while count < retries:
677 count += 1
678 if last_error:
679 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
680 try:
681 response = self._call_api(
682 ep=ep, fatal=True, headers=headers,
683 video_id=item_id, query=query,
684 context=self._extract_context(ytcfg, default_client),
685 api_key=self._extract_api_key(ytcfg, default_client),
686 api_hostname=api_hostname, default_client=default_client,
687 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
688 except ExtractorError as e:
689 if isinstance(e.cause, network_exceptions):
690 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
691 e.cause.seek(0)
692 yt_error = try_get(
693 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
694 lambda x: x['error']['message'], compat_str)
695 if yt_error:
696 self._report_alerts([('ERROR', yt_error)], fatal=False)
697 # Downloading page may result in intermittent 5xx HTTP error
698 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
699 # We also want to catch all other network exceptions since errors in later pages can be troublesome
700 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
701 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
702 last_error = error_to_compat_str(e.cause or e.msg)
703 if count < retries:
704 continue
705 if fatal:
706 raise
707 else:
708 self.report_warning(error_to_compat_str(e))
709 return
710
711 else:
712 try:
713 self._extract_and_report_alerts(response, only_once=True)
714 except ExtractorError as e:
715 # YouTube servers may return errors we want to retry on in a 200 OK response
716 # See: https://github.com/yt-dlp/yt-dlp/issues/839
717 if 'unknown error' in e.msg.lower():
718 last_error = e.msg
719 continue
720 if fatal:
721 raise
722 self.report_warning(error_to_compat_str(e))
723 return
724 if not check_get_keys or dict_get(response, check_get_keys):
725 break
726 # Youtube sometimes sends incomplete data
727 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
728 last_error = 'Incomplete data received'
729 if count >= retries:
730 if fatal:
731 raise ExtractorError(last_error)
732 else:
733 self.report_warning(last_error)
734 return
735 return response
736
737 @staticmethod
738 def is_music_url(url):
739 return re.match(r'https?://music\.youtube\.com/', url) is not None
740
741 def _extract_video(self, renderer):
742 video_id = renderer.get('videoId')
743 title = self._get_text(renderer, 'title')
744 description = self._get_text(renderer, 'descriptionSnippet')
745 duration = parse_duration(self._get_text(
746 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
747 view_count_text = self._get_text(renderer, 'viewCountText') or ''
748 view_count = str_to_int(self._search_regex(
749 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
750 'view count', default=None))
751
752 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
753
754 return {
755 '_type': 'url',
756 'ie_key': YoutubeIE.ie_key(),
757 'id': video_id,
758 'url': f'https://www.youtube.com/watch?v={video_id}',
759 'title': title,
760 'description': description,
761 'duration': duration,
762 'view_count': view_count,
763 'uploader': uploader,
764 }
765
766
767 class YoutubeIE(YoutubeBaseInfoExtractor):
768 IE_DESC = 'YouTube'
769 _VALID_URL = r"""(?x)^
770 (
771 (?:https?://|//) # http(s):// or protocol-independent URL
772 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
773 (?:www\.)?deturl\.com/www\.youtube\.com|
774 (?:www\.)?pwnyoutube\.com|
775 (?:www\.)?hooktube\.com|
776 (?:www\.)?yourepeat\.com|
777 tube\.majestyc\.net|
778 %(invidious)s|
779 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
780 (?:.*?\#/)? # handle anchor (#/) redirect urls
781 (?: # the various things that can precede the ID:
782 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
783 |(?: # or the v= param in all its forms
784 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
785 (?:\?|\#!?) # the params delimiter ? or # or #!
786 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
787 v=
788 )
789 ))
790 |(?:
791 youtu\.be| # just youtu.be/xxxx
792 vid\.plus| # or vid.plus/xxxx
793 zwearz\.com/watch| # or zwearz.com/watch/xxxx
794 %(invidious)s
795 )/
796 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
797 )
798 )? # all until now is optional -> you can pass the naked ID
799 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
800 (?(1).+)? # if we found the ID, everything can follow
801 (?:\#|$)""" % {
802 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
803 }
804 _PLAYER_INFO_RE = (
805 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
806 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
807 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
808 )
809 _formats = {
810 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
811 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
812 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
813 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
814 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
815 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
816 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
817 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
818 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
819 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
820 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
821 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
822 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
823 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
824 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
825 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
826 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
827 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
828
829
830 # 3D videos
831 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
832 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
833 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
834 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
835 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
836 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
837 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
838
839 # Apple HTTP Live Streaming
840 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
841 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
842 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
843 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
844 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
845 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
846 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
847 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
848
849 # DASH mp4 video
850 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
851 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
852 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
853 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
854 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
855 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
856 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
857 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
858 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
859 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
860 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
861 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
862
863 # Dash mp4 audio
864 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
865 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
866 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
867 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
868 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
869 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
870 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
871
872 # Dash webm
873 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
874 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
875 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
876 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
877 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
878 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
879 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
880 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
881 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
882 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
883 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
884 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
885 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
886 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
887 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
888 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
889 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
890 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
891 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
892 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
893 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
894 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
895
896 # Dash webm audio
897 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
898 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
899
900 # Dash webm audio with opus inside
901 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
902 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
903 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
904
905 # RTMP (unnamed)
906 '_rtmp': {'protocol': 'rtmp'},
907
908 # av01 video only formats sometimes served with "unknown" codecs
909 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
910 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
911 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
912 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
913 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
914 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
915 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
916 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
917 }
918 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
919
920 _GEO_BYPASS = False
921
922 IE_NAME = 'youtube'
923 _TESTS = [
924 {
925 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
926 'info_dict': {
927 'id': 'BaW_jenozKc',
928 'ext': 'mp4',
929 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
930 'uploader': 'Philipp Hagemeister',
931 'uploader_id': 'phihag',
932 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
933 'channel': 'Philipp Hagemeister',
934 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
935 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
936 'upload_date': '20121002',
937 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
938 'categories': ['Science & Technology'],
939 'tags': ['youtube-dl'],
940 'duration': 10,
941 'view_count': int,
942 'like_count': int,
943 # 'dislike_count': int,
944 'availability': 'public',
945 'playable_in_embed': True,
946 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
947 'live_status': 'not_live',
948 'age_limit': 0,
949 'start_time': 1,
950 'end_time': 9,
951 }
952 },
953 {
954 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
955 'note': 'Embed-only video (#1746)',
956 'info_dict': {
957 'id': 'yZIXLfi8CZQ',
958 'ext': 'mp4',
959 'upload_date': '20120608',
960 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
961 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
962 'uploader': 'SET India',
963 'uploader_id': 'setindia',
964 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
965 'age_limit': 18,
966 },
967 'skip': 'Private video',
968 },
969 {
970 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
971 'note': 'Use the first video ID in the URL',
972 'info_dict': {
973 'id': 'BaW_jenozKc',
974 'ext': 'mp4',
975 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
976 'uploader': 'Philipp Hagemeister',
977 'uploader_id': 'phihag',
978 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
979 'upload_date': '20121002',
980 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
981 'categories': ['Science & Technology'],
982 'tags': ['youtube-dl'],
983 'duration': 10,
984 'view_count': int,
985 'like_count': int,
986 'dislike_count': int,
987 },
988 'params': {
989 'skip_download': True,
990 },
991 },
992 {
993 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
994 'note': '256k DASH audio (format 141) via DASH manifest',
995 'info_dict': {
996 'id': 'a9LDPn-MO4I',
997 'ext': 'm4a',
998 'upload_date': '20121002',
999 'uploader_id': '8KVIDEO',
1000 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1001 'description': '',
1002 'uploader': '8KVIDEO',
1003 'title': 'UHDTV TEST 8K VIDEO.mp4'
1004 },
1005 'params': {
1006 'youtube_include_dash_manifest': True,
1007 'format': '141',
1008 },
1009 'skip': 'format 141 not served anymore',
1010 },
1011 # DASH manifest with encrypted signature
1012 {
1013 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1014 'info_dict': {
1015 'id': 'IB3lcPjvWLA',
1016 'ext': 'm4a',
1017 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1018 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1019 'duration': 244,
1020 'uploader': 'AfrojackVEVO',
1021 'uploader_id': 'AfrojackVEVO',
1022 'upload_date': '20131011',
1023 'abr': 129.495,
1024 },
1025 'params': {
1026 'youtube_include_dash_manifest': True,
1027 'format': '141/bestaudio[ext=m4a]',
1028 },
1029 },
1030 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1031 {
1032 'note': 'Embed allowed age-gate video',
1033 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1034 'info_dict': {
1035 'id': 'HtVdAasjOgU',
1036 'ext': 'mp4',
1037 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1038 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1039 'duration': 142,
1040 'uploader': 'The Witcher',
1041 'uploader_id': 'WitcherGame',
1042 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1043 'upload_date': '20140605',
1044 'age_limit': 18,
1045 },
1046 },
1047 {
1048 'note': 'Age-gate video with embed allowed in public site',
1049 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1050 'info_dict': {
1051 'id': 'HsUATh_Nc2U',
1052 'ext': 'mp4',
1053 'title': 'Godzilla 2 (Official Video)',
1054 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1055 'upload_date': '20200408',
1056 'uploader_id': 'FlyingKitty900',
1057 'uploader': 'FlyingKitty',
1058 'age_limit': 18,
1059 },
1060 },
1061 {
1062 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1063 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1064 'info_dict': {
1065 'id': 'Tq92D6wQ1mg',
1066 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1067 'ext': 'mp4',
1068 'upload_date': '20191227',
1069 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1070 'uploader': 'Projekt Melody',
1071 'description': 'md5:17eccca93a786d51bc67646756894066',
1072 'age_limit': 18,
1073 },
1074 },
1075 {
1076 'note': 'Non-Agegated non-embeddable video',
1077 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1078 'info_dict': {
1079 'id': 'MeJVWBSsPAY',
1080 'ext': 'mp4',
1081 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1082 'uploader': 'Herr Lurik',
1083 'uploader_id': 'st3in234',
1084 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1085 'upload_date': '20130730',
1086 },
1087 },
1088 {
1089 'note': 'Non-bypassable age-gated video',
1090 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1091 'only_matching': True,
1092 },
1093 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1094 # YouTube Red ad is not captured for creator
1095 {
1096 'url': '__2ABJjxzNo',
1097 'info_dict': {
1098 'id': '__2ABJjxzNo',
1099 'ext': 'mp4',
1100 'duration': 266,
1101 'upload_date': '20100430',
1102 'uploader_id': 'deadmau5',
1103 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1104 'creator': 'deadmau5',
1105 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1106 'uploader': 'deadmau5',
1107 'title': 'Deadmau5 - Some Chords (HD)',
1108 'alt_title': 'Some Chords',
1109 },
1110 'expected_warnings': [
1111 'DASH manifest missing',
1112 ]
1113 },
1114 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1115 {
1116 'url': 'lqQg6PlCWgI',
1117 'info_dict': {
1118 'id': 'lqQg6PlCWgI',
1119 'ext': 'mp4',
1120 'duration': 6085,
1121 'upload_date': '20150827',
1122 'uploader_id': 'olympic',
1123 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1124 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1125 'uploader': 'Olympics',
1126 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1127 },
1128 'params': {
1129 'skip_download': 'requires avconv',
1130 }
1131 },
1132 # Non-square pixels
1133 {
1134 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1135 'info_dict': {
1136 'id': '_b-2C3KPAM0',
1137 'ext': 'mp4',
1138 'stretched_ratio': 16 / 9.,
1139 'duration': 85,
1140 'upload_date': '20110310',
1141 'uploader_id': 'AllenMeow',
1142 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1143 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1144 'uploader': '孫ᄋᄅ',
1145 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1146 },
1147 },
1148 # url_encoded_fmt_stream_map is empty string
1149 {
1150 'url': 'qEJwOuvDf7I',
1151 'info_dict': {
1152 'id': 'qEJwOuvDf7I',
1153 'ext': 'webm',
1154 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1155 'description': '',
1156 'upload_date': '20150404',
1157 'uploader_id': 'spbelect',
1158 'uploader': 'Наблюдатели Петербурга',
1159 },
1160 'params': {
1161 'skip_download': 'requires avconv',
1162 },
1163 'skip': 'This live event has ended.',
1164 },
1165 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1166 {
1167 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1168 'info_dict': {
1169 'id': 'FIl7x6_3R5Y',
1170 'ext': 'webm',
1171 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1172 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1173 'duration': 220,
1174 'upload_date': '20150625',
1175 'uploader_id': 'dorappi2000',
1176 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1177 'uploader': 'dorappi2000',
1178 'formats': 'mincount:31',
1179 },
1180 'skip': 'not actual anymore',
1181 },
1182 # DASH manifest with segment_list
1183 {
1184 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1185 'md5': '8ce563a1d667b599d21064e982ab9e31',
1186 'info_dict': {
1187 'id': 'CsmdDsKjzN8',
1188 'ext': 'mp4',
1189 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1190 'uploader': 'Airtek',
1191 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1192 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1193 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1194 },
1195 'params': {
1196 'youtube_include_dash_manifest': True,
1197 'format': '135', # bestvideo
1198 },
1199 'skip': 'This live event has ended.',
1200 },
1201 {
1202 # Multifeed videos (multiple cameras), URL is for Main Camera
1203 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1204 'info_dict': {
1205 'id': 'jvGDaLqkpTg',
1206 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1207 'description': 'md5:e03b909557865076822aa169218d6a5d',
1208 },
1209 'playlist': [{
1210 'info_dict': {
1211 'id': 'jvGDaLqkpTg',
1212 'ext': 'mp4',
1213 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1214 'description': 'md5:e03b909557865076822aa169218d6a5d',
1215 'duration': 10643,
1216 'upload_date': '20161111',
1217 'uploader': 'Team PGP',
1218 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1219 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1220 },
1221 }, {
1222 'info_dict': {
1223 'id': '3AKt1R1aDnw',
1224 'ext': 'mp4',
1225 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1226 'description': 'md5:e03b909557865076822aa169218d6a5d',
1227 'duration': 10991,
1228 'upload_date': '20161111',
1229 'uploader': 'Team PGP',
1230 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1231 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1232 },
1233 }, {
1234 'info_dict': {
1235 'id': 'RtAMM00gpVc',
1236 'ext': 'mp4',
1237 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1238 'description': 'md5:e03b909557865076822aa169218d6a5d',
1239 'duration': 10995,
1240 'upload_date': '20161111',
1241 'uploader': 'Team PGP',
1242 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1243 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1244 },
1245 }, {
1246 'info_dict': {
1247 'id': '6N2fdlP3C5U',
1248 'ext': 'mp4',
1249 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1250 'description': 'md5:e03b909557865076822aa169218d6a5d',
1251 'duration': 10990,
1252 'upload_date': '20161111',
1253 'uploader': 'Team PGP',
1254 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1255 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1256 },
1257 }],
1258 'params': {
1259 'skip_download': True,
1260 },
1261 'skip': 'Not multifeed anymore',
1262 },
1263 {
1264 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1265 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1266 'info_dict': {
1267 'id': 'gVfLd0zydlo',
1268 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1269 },
1270 'playlist_count': 2,
1271 'skip': 'Not multifeed anymore',
1272 },
1273 {
1274 'url': 'https://vid.plus/FlRa-iH7PGw',
1275 'only_matching': True,
1276 },
1277 {
1278 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1279 'only_matching': True,
1280 },
1281 {
1282 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1283 # Also tests cut-off URL expansion in video description (see
1284 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1285 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1286 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1287 'info_dict': {
1288 'id': 'lsguqyKfVQg',
1289 'ext': 'mp4',
1290 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1291 'alt_title': 'Dark Walk',
1292 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1293 'duration': 133,
1294 'upload_date': '20151119',
1295 'uploader_id': 'IronSoulElf',
1296 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1297 'uploader': 'IronSoulElf',
1298 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1299 'track': 'Dark Walk',
1300 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1301 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1302 },
1303 'params': {
1304 'skip_download': True,
1305 },
1306 },
1307 {
1308 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1309 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1310 'only_matching': True,
1311 },
1312 {
1313 # Video with yt:stretch=17:0
1314 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1315 'info_dict': {
1316 'id': 'Q39EVAstoRM',
1317 'ext': 'mp4',
1318 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1319 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1320 'upload_date': '20151107',
1321 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1322 'uploader': 'CH GAMER DROID',
1323 },
1324 'params': {
1325 'skip_download': True,
1326 },
1327 'skip': 'This video does not exist.',
1328 },
1329 {
1330 # Video with incomplete 'yt:stretch=16:'
1331 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1332 'only_matching': True,
1333 },
1334 {
1335 # Video licensed under Creative Commons
1336 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1337 'info_dict': {
1338 'id': 'M4gD1WSo5mA',
1339 'ext': 'mp4',
1340 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1341 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1342 'duration': 721,
1343 'upload_date': '20150127',
1344 'uploader_id': 'BerkmanCenter',
1345 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1346 'uploader': 'The Berkman Klein Center for Internet & Society',
1347 'license': 'Creative Commons Attribution license (reuse allowed)',
1348 },
1349 'params': {
1350 'skip_download': True,
1351 },
1352 },
1353 {
1354 # Channel-like uploader_url
1355 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1356 'info_dict': {
1357 'id': 'eQcmzGIKrzg',
1358 'ext': 'mp4',
1359 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1360 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1361 'duration': 4060,
1362 'upload_date': '20151119',
1363 'uploader': 'Bernie Sanders',
1364 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1365 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1366 'license': 'Creative Commons Attribution license (reuse allowed)',
1367 },
1368 'params': {
1369 'skip_download': True,
1370 },
1371 },
1372 {
1373 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1374 'only_matching': True,
1375 },
1376 {
1377 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1378 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1379 'only_matching': True,
1380 },
1381 {
1382 # Rental video preview
1383 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1384 'info_dict': {
1385 'id': 'uGpuVWrhIzE',
1386 'ext': 'mp4',
1387 'title': 'Piku - Trailer',
1388 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1389 'upload_date': '20150811',
1390 'uploader': 'FlixMatrix',
1391 'uploader_id': 'FlixMatrixKaravan',
1392 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1393 'license': 'Standard YouTube License',
1394 },
1395 'params': {
1396 'skip_download': True,
1397 },
1398 'skip': 'This video is not available.',
1399 },
1400 {
1401 # YouTube Red video with episode data
1402 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1403 'info_dict': {
1404 'id': 'iqKdEhx-dD4',
1405 'ext': 'mp4',
1406 'title': 'Isolation - Mind Field (Ep 1)',
1407 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1408 'duration': 2085,
1409 'upload_date': '20170118',
1410 'uploader': 'Vsauce',
1411 'uploader_id': 'Vsauce',
1412 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1413 'series': 'Mind Field',
1414 'season_number': 1,
1415 'episode_number': 1,
1416 },
1417 'params': {
1418 'skip_download': True,
1419 },
1420 'expected_warnings': [
1421 'Skipping DASH manifest',
1422 ],
1423 },
1424 {
1425 # The following content has been identified by the YouTube community
1426 # as inappropriate or offensive to some audiences.
1427 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1428 'info_dict': {
1429 'id': '6SJNVb0GnPI',
1430 'ext': 'mp4',
1431 'title': 'Race Differences in Intelligence',
1432 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1433 'duration': 965,
1434 'upload_date': '20140124',
1435 'uploader': 'New Century Foundation',
1436 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1437 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1438 },
1439 'params': {
1440 'skip_download': True,
1441 },
1442 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1443 },
1444 {
1445 # itag 212
1446 'url': '1t24XAntNCY',
1447 'only_matching': True,
1448 },
1449 {
1450 # geo restricted to JP
1451 'url': 'sJL6WA-aGkQ',
1452 'only_matching': True,
1453 },
1454 {
1455 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1456 'only_matching': True,
1457 },
1458 {
1459 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1460 'only_matching': True,
1461 },
1462 {
1463 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1464 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1465 'only_matching': True,
1466 },
1467 {
1468 # DRM protected
1469 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1470 'only_matching': True,
1471 },
1472 {
1473 # Video with unsupported adaptive stream type formats
1474 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1475 'info_dict': {
1476 'id': 'Z4Vy8R84T1U',
1477 'ext': 'mp4',
1478 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1479 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1480 'duration': 433,
1481 'upload_date': '20130923',
1482 'uploader': 'Amelia Putri Harwita',
1483 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1484 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1485 'formats': 'maxcount:10',
1486 },
1487 'params': {
1488 'skip_download': True,
1489 'youtube_include_dash_manifest': False,
1490 },
1491 'skip': 'not actual anymore',
1492 },
1493 {
1494 # Youtube Music Auto-generated description
1495 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1496 'info_dict': {
1497 'id': 'MgNrAu2pzNs',
1498 'ext': 'mp4',
1499 'title': 'Voyeur Girl',
1500 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1501 'upload_date': '20190312',
1502 'uploader': 'Stephen - Topic',
1503 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1504 'artist': 'Stephen',
1505 'track': 'Voyeur Girl',
1506 'album': 'it\'s too much love to know my dear',
1507 'release_date': '20190313',
1508 'release_year': 2019,
1509 },
1510 'params': {
1511 'skip_download': True,
1512 },
1513 },
1514 {
1515 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1516 'only_matching': True,
1517 },
1518 {
1519 # invalid -> valid video id redirection
1520 'url': 'DJztXj2GPfl',
1521 'info_dict': {
1522 'id': 'DJztXj2GPfk',
1523 'ext': 'mp4',
1524 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1525 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1526 'upload_date': '20090125',
1527 'uploader': 'Prochorowka',
1528 'uploader_id': 'Prochorowka',
1529 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1530 'artist': 'Panjabi MC',
1531 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1532 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1533 },
1534 'params': {
1535 'skip_download': True,
1536 },
1537 'skip': 'Video unavailable',
1538 },
1539 {
1540 # empty description results in an empty string
1541 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1542 'info_dict': {
1543 'id': 'x41yOUIvK2k',
1544 'ext': 'mp4',
1545 'title': 'IMG 3456',
1546 'description': '',
1547 'upload_date': '20170613',
1548 'uploader_id': 'ElevageOrVert',
1549 'uploader': 'ElevageOrVert',
1550 },
1551 'params': {
1552 'skip_download': True,
1553 },
1554 },
1555 {
1556 # with '};' inside yt initial data (see [1])
1557 # see [2] for an example with '};' inside ytInitialPlayerResponse
1558 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1559 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1560 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1561 'info_dict': {
1562 'id': 'CHqg6qOn4no',
1563 'ext': 'mp4',
1564 'title': 'Part 77 Sort a list of simple types in c#',
1565 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1566 'upload_date': '20130831',
1567 'uploader_id': 'kudvenkat',
1568 'uploader': 'kudvenkat',
1569 },
1570 'params': {
1571 'skip_download': True,
1572 },
1573 },
1574 {
1575 # another example of '};' in ytInitialData
1576 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1577 'only_matching': True,
1578 },
1579 {
1580 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1581 'only_matching': True,
1582 },
1583 {
1584 # https://github.com/ytdl-org/youtube-dl/pull/28094
1585 'url': 'OtqTfy26tG0',
1586 'info_dict': {
1587 'id': 'OtqTfy26tG0',
1588 'ext': 'mp4',
1589 'title': 'Burn Out',
1590 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1591 'upload_date': '20141120',
1592 'uploader': 'The Cinematic Orchestra - Topic',
1593 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1594 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1595 'artist': 'The Cinematic Orchestra',
1596 'track': 'Burn Out',
1597 'album': 'Every Day',
1598 'release_data': None,
1599 'release_year': None,
1600 },
1601 'params': {
1602 'skip_download': True,
1603 },
1604 },
1605 {
1606 # controversial video, only works with bpctr when authenticated with cookies
1607 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1608 'only_matching': True,
1609 },
1610 {
1611 # controversial video, requires bpctr/contentCheckOk
1612 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1613 'info_dict': {
1614 'id': 'SZJvDhaSDnc',
1615 'ext': 'mp4',
1616 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1617 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1618 'uploader': 'CBS This Morning',
1619 'uploader_id': 'CBSThisMorning',
1620 'upload_date': '20140716',
1621 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1622 }
1623 },
1624 {
1625 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1626 'url': 'cBvYw8_A0vQ',
1627 'info_dict': {
1628 'id': 'cBvYw8_A0vQ',
1629 'ext': 'mp4',
1630 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1631 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1632 'upload_date': '20201120',
1633 'uploader': 'Walk around Japan',
1634 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1635 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1636 },
1637 'params': {
1638 'skip_download': True,
1639 },
1640 }, {
1641 # Has multiple audio streams
1642 'url': 'WaOKSUlf4TM',
1643 'only_matching': True
1644 }, {
1645 # Requires Premium: has format 141 when requested using YTM url
1646 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1647 'only_matching': True
1648 }, {
1649 # multiple subtitles with same lang_code
1650 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1651 'only_matching': True,
1652 }, {
1653 # Force use android client fallback
1654 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1655 'info_dict': {
1656 'id': 'YOelRv7fMxY',
1657 'title': 'DIGGING A SECRET TUNNEL Part 1',
1658 'ext': '3gp',
1659 'upload_date': '20210624',
1660 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1661 'uploader': 'colinfurze',
1662 'uploader_id': 'colinfurze',
1663 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1664 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1665 },
1666 'params': {
1667 'format': '17', # 3gp format available on android
1668 'extractor_args': {'youtube': {'player_client': ['android']}},
1669 },
1670 },
1671 {
1672 # Skip download of additional client configs (remix client config in this case)
1673 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1674 'only_matching': True,
1675 'params': {
1676 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1677 },
1678 }, {
1679 # shorts
1680 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1681 'only_matching': True,
1682 }, {
1683 'note': 'Storyboards',
1684 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1685 'info_dict': {
1686 'id': '5KLPxDtMqe8',
1687 'ext': 'mhtml',
1688 'format_id': 'sb0',
1689 'title': 'Your Brain is Plastic',
1690 'uploader_id': 'scishow',
1691 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1692 'upload_date': '20140324',
1693 'uploader': 'SciShow',
1694 }, 'params': {'format': 'mhtml', 'skip_download': True}
1695 }
1696 ]
1697
1698 @classmethod
1699 def suitable(cls, url):
1700 from ..utils import parse_qs
1701
1702 qs = parse_qs(url)
1703 if qs.get('list', [None])[0]:
1704 return False
1705 return super(YoutubeIE, cls).suitable(url)
1706
1707 def __init__(self, *args, **kwargs):
1708 super(YoutubeIE, self).__init__(*args, **kwargs)
1709 self._code_cache = {}
1710 self._player_cache = {}
1711
1712 def _extract_player_url(self, *ytcfgs, webpage=None):
1713 player_url = traverse_obj(
1714 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1715 get_all=False, expected_type=compat_str)
1716 if not player_url:
1717 return
1718 if player_url.startswith('//'):
1719 player_url = 'https:' + player_url
1720 elif not re.match(r'https?://', player_url):
1721 player_url = compat_urlparse.urljoin(
1722 'https://www.youtube.com', player_url)
1723 return player_url
1724
1725 def _download_player_url(self, video_id, fatal=False):
1726 res = self._download_webpage(
1727 'https://www.youtube.com/iframe_api',
1728 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1729 if res:
1730 player_version = self._search_regex(
1731 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1732 if player_version:
1733 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1734
1735 def _signature_cache_id(self, example_sig):
1736 """ Return a string representation of a signature """
1737 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1738
1739 @classmethod
1740 def _extract_player_info(cls, player_url):
1741 for player_re in cls._PLAYER_INFO_RE:
1742 id_m = re.search(player_re, player_url)
1743 if id_m:
1744 break
1745 else:
1746 raise ExtractorError('Cannot identify player %r' % player_url)
1747 return id_m.group('id')
1748
1749 def _load_player(self, video_id, player_url, fatal=True):
1750 player_id = self._extract_player_info(player_url)
1751 if player_id not in self._code_cache:
1752 code = self._download_webpage(
1753 player_url, video_id, fatal=fatal,
1754 note='Downloading player ' + player_id,
1755 errnote='Download of %s failed' % player_url)
1756 if code:
1757 self._code_cache[player_id] = code
1758 return self._code_cache.get(player_id)
1759
1760 def _extract_signature_function(self, video_id, player_url, example_sig):
1761 player_id = self._extract_player_info(player_url)
1762
1763 # Read from filesystem cache
1764 func_id = 'js_%s_%s' % (
1765 player_id, self._signature_cache_id(example_sig))
1766 assert os.path.basename(func_id) == func_id
1767
1768 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1769 if cache_spec is not None:
1770 return lambda s: ''.join(s[i] for i in cache_spec)
1771
1772 code = self._load_player(video_id, player_url)
1773 if code:
1774 res = self._parse_sig_js(code)
1775
1776 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1777 cache_res = res(test_string)
1778 cache_spec = [ord(c) for c in cache_res]
1779
1780 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1781 return res
1782
1783 def _print_sig_code(self, func, example_sig):
1784 if not self.get_param('youtube_print_sig_code'):
1785 return
1786
1787 def gen_sig_code(idxs):
1788 def _genslice(start, end, step):
1789 starts = '' if start == 0 else str(start)
1790 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1791 steps = '' if step == 1 else (':%d' % step)
1792 return 's[%s%s%s]' % (starts, ends, steps)
1793
1794 step = None
1795 # Quelch pyflakes warnings - start will be set when step is set
1796 start = '(Never used)'
1797 for i, prev in zip(idxs[1:], idxs[:-1]):
1798 if step is not None:
1799 if i - prev == step:
1800 continue
1801 yield _genslice(start, prev, step)
1802 step = None
1803 continue
1804 if i - prev in [-1, 1]:
1805 step = i - prev
1806 start = prev
1807 continue
1808 else:
1809 yield 's[%d]' % prev
1810 if step is None:
1811 yield 's[%d]' % i
1812 else:
1813 yield _genslice(start, i, step)
1814
1815 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1816 cache_res = func(test_string)
1817 cache_spec = [ord(c) for c in cache_res]
1818 expr_code = ' + '.join(gen_sig_code(cache_spec))
1819 signature_id_tuple = '(%s)' % (
1820 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1821 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1822 ' return %s\n') % (signature_id_tuple, expr_code)
1823 self.to_screen('Extracted signature function:\n' + code)
1824
1825 def _parse_sig_js(self, jscode):
1826 funcname = self._search_regex(
1827 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1828 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1829 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1830 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1831 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1832 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1833 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1834 # Obsolete patterns
1835 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1836 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1837 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1838 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1839 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1840 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1841 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1842 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1843 jscode, 'Initial JS player signature function name', group='sig')
1844
1845 jsi = JSInterpreter(jscode)
1846 initial_function = jsi.extract_function(funcname)
1847 return lambda s: initial_function([s])
1848
1849 def _decrypt_signature(self, s, video_id, player_url):
1850 """Turn the encrypted s field into a working signature"""
1851
1852 if player_url is None:
1853 raise ExtractorError('Cannot decrypt signature without player_url')
1854
1855 try:
1856 player_id = (player_url, self._signature_cache_id(s))
1857 if player_id not in self._player_cache:
1858 func = self._extract_signature_function(
1859 video_id, player_url, s
1860 )
1861 self._player_cache[player_id] = func
1862 func = self._player_cache[player_id]
1863 self._print_sig_code(func, s)
1864 return func(s)
1865 except Exception as e:
1866 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
1867
1868 def _decrypt_nsig(self, s, video_id, player_url):
1869 """Turn the encrypted n field into a working signature"""
1870 if player_url is None:
1871 raise ExtractorError('Cannot decrypt nsig without player_url')
1872 if player_url.startswith('//'):
1873 player_url = 'https:' + player_url
1874 elif not re.match(r'https?://', player_url):
1875 player_url = compat_urlparse.urljoin(
1876 'https://www.youtube.com', player_url)
1877
1878 sig_id = ('nsig_value', s)
1879 if sig_id in self._player_cache:
1880 return self._player_cache[sig_id]
1881
1882 try:
1883 player_id = ('nsig', player_url)
1884 if player_id not in self._player_cache:
1885 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
1886 func = self._player_cache[player_id]
1887 self._player_cache[sig_id] = func(s)
1888 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
1889 return self._player_cache[sig_id]
1890 except Exception as e:
1891 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
1892
1893 def _extract_n_function_name(self, jscode):
1894 return self._search_regex(
1895 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
1896 jscode, 'Initial JS player n function name', group='nfunc')
1897
1898 def _extract_n_function(self, video_id, player_url):
1899 player_id = self._extract_player_info(player_url)
1900 func_code = self._downloader.cache.load('youtube-nsig', player_id)
1901
1902 if func_code:
1903 jsi = JSInterpreter(func_code)
1904 else:
1905 jscode = self._load_player(video_id, player_url)
1906 funcname = self._extract_n_function_name(jscode)
1907 jsi = JSInterpreter(jscode)
1908 func_code = jsi.extract_function_code(funcname)
1909 self._downloader.cache.store('youtube-nsig', player_id, func_code)
1910
1911 if self.get_param('youtube_print_sig_code'):
1912 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
1913
1914 return lambda s: jsi.extract_function_from_code(*func_code)([s])
1915
1916 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1917 """
1918 Extract signatureTimestamp (sts)
1919 Required to tell API what sig/player version is in use.
1920 """
1921 sts = None
1922 if isinstance(ytcfg, dict):
1923 sts = int_or_none(ytcfg.get('STS'))
1924
1925 if not sts:
1926 # Attempt to extract from player
1927 if player_url is None:
1928 error_msg = 'Cannot extract signature timestamp without player_url.'
1929 if fatal:
1930 raise ExtractorError(error_msg)
1931 self.report_warning(error_msg)
1932 return
1933 code = self._load_player(video_id, player_url, fatal=fatal)
1934 if code:
1935 sts = int_or_none(self._search_regex(
1936 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1937 'JS player signature timestamp', group='sts', fatal=fatal))
1938 return sts
1939
1940 def _mark_watched(self, video_id, player_responses):
1941 playback_url = get_first(
1942 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1943 expected_type=url_or_none)
1944 if not playback_url:
1945 self.report_warning('Unable to mark watched')
1946 return
1947 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1948 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1949
1950 # cpn generation algorithm is reverse engineered from base.js.
1951 # In fact it works even with dummy cpn.
1952 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1953 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1954
1955 qs.update({
1956 'ver': ['2'],
1957 'cpn': [cpn],
1958 })
1959 playback_url = compat_urlparse.urlunparse(
1960 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1961
1962 self._download_webpage(
1963 playback_url, video_id, 'Marking watched',
1964 'Unable to mark watched', fatal=False)
1965
1966 @staticmethod
1967 def _extract_urls(webpage):
1968 # Embedded YouTube player
1969 entries = [
1970 unescapeHTML(mobj.group('url'))
1971 for mobj in re.finditer(r'''(?x)
1972 (?:
1973 <iframe[^>]+?src=|
1974 data-video-url=|
1975 <embed[^>]+?src=|
1976 embedSWF\(?:\s*|
1977 <object[^>]+data=|
1978 new\s+SWFObject\(
1979 )
1980 (["\'])
1981 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1982 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1983 \1''', webpage)]
1984
1985 # lazyYT YouTube embed
1986 entries.extend(list(map(
1987 unescapeHTML,
1988 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1989
1990 # Wordpress "YouTube Video Importer" plugin
1991 matches = re.findall(r'''(?x)<div[^>]+
1992 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1993 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1994 entries.extend(m[-1] for m in matches)
1995
1996 return entries
1997
1998 @staticmethod
1999 def _extract_url(webpage):
2000 urls = YoutubeIE._extract_urls(webpage)
2001 return urls[0] if urls else None
2002
2003 @classmethod
2004 def extract_id(cls, url):
2005 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2006 if mobj is None:
2007 raise ExtractorError('Invalid URL: %s' % url)
2008 return mobj.group('id')
2009
2010 def _extract_chapters_from_json(self, data, duration):
2011 chapter_list = traverse_obj(
2012 data, (
2013 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2014 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2015 ), expected_type=list)
2016
2017 return self._extract_chapters(
2018 chapter_list,
2019 chapter_time=lambda chapter: float_or_none(
2020 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2021 chapter_title=lambda chapter: traverse_obj(
2022 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2023 duration=duration)
2024
2025 def _extract_chapters_from_engagement_panel(self, data, duration):
2026 content_list = traverse_obj(
2027 data,
2028 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2029 expected_type=list, default=[])
2030 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2031 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2032
2033 return next((
2034 filter(None, (
2035 self._extract_chapters(
2036 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2037 chapter_time, chapter_title, duration)
2038 for contents in content_list
2039 ))), [])
2040
2041 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2042 chapters = []
2043 last_chapter = {'start_time': 0}
2044 for idx, chapter in enumerate(chapter_list or []):
2045 title = chapter_title(chapter)
2046 start_time = chapter_time(chapter)
2047 if start_time is None:
2048 continue
2049 last_chapter['end_time'] = start_time
2050 if start_time < last_chapter['start_time']:
2051 if idx == 1:
2052 chapters.pop()
2053 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2054 else:
2055 self.report_warning(f'Invalid start time for chapter "{title}"')
2056 continue
2057 last_chapter = {'start_time': start_time, 'title': title}
2058 chapters.append(last_chapter)
2059 last_chapter['end_time'] = duration
2060 return chapters
2061
2062 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2063 return self._parse_json(self._search_regex(
2064 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2065 regex), webpage, name, default='{}'), video_id, fatal=False)
2066
2067 @staticmethod
2068 def parse_time_text(time_text):
2069 """
2070 Parse the comment time text
2071 time_text is in the format 'X units ago (edited)'
2072 """
2073 time_text_split = time_text.split(' ')
2074 if len(time_text_split) >= 3:
2075 try:
2076 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2077 except ValueError:
2078 return None
2079
2080 def _extract_comment(self, comment_renderer, parent=None):
2081 comment_id = comment_renderer.get('commentId')
2082 if not comment_id:
2083 return
2084
2085 text = self._get_text(comment_renderer, 'contentText')
2086
2087 # note: timestamp is an estimate calculated from the current time and time_text
2088 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2089 time_text_dt = self.parse_time_text(time_text)
2090 if isinstance(time_text_dt, datetime.datetime):
2091 timestamp = calendar.timegm(time_text_dt.timetuple())
2092 author = self._get_text(comment_renderer, 'authorText')
2093 author_id = try_get(comment_renderer,
2094 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2095
2096 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2097 lambda x: x['likeCount']), compat_str)) or 0
2098 author_thumbnail = try_get(comment_renderer,
2099 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2100
2101 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2102 is_favorited = 'creatorHeart' in (try_get(
2103 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2104 return {
2105 'id': comment_id,
2106 'text': text,
2107 'timestamp': timestamp,
2108 'time_text': time_text,
2109 'like_count': votes,
2110 'is_favorited': is_favorited,
2111 'author': author,
2112 'author_id': author_id,
2113 'author_thumbnail': author_thumbnail,
2114 'author_is_uploader': author_is_uploader,
2115 'parent': parent or 'root'
2116 }
2117
2118 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2119
2120 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2121
2122 def extract_header(contents):
2123 _continuation = None
2124 for content in contents:
2125 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
2126 expected_comment_count = parse_count(self._get_text(
2127 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2128
2129 if expected_comment_count:
2130 tracker['est_total'] = expected_comment_count
2131 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2132 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2133
2134 sort_menu_item = try_get(
2135 comments_header_renderer,
2136 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2137 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2138
2139 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2140 if not _continuation:
2141 continue
2142
2143 sort_text = str_or_none(sort_menu_item.get('title'))
2144 if not sort_text:
2145 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2146 self.to_screen('Sorting comments by %s' % sort_text.lower())
2147 break
2148 return _continuation
2149
2150 def extract_thread(contents):
2151 if not parent:
2152 tracker['current_page_thread'] = 0
2153 for content in contents:
2154 if not parent and tracker['total_parent_comments'] >= max_parents:
2155 yield
2156 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2157 comment_renderer = get_first(
2158 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2159 expected_type=dict, default={})
2160
2161 comment = self._extract_comment(comment_renderer, parent)
2162 if not comment:
2163 continue
2164
2165 tracker['running_total'] += 1
2166 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
2167 yield comment
2168
2169 # Attempt to get the replies
2170 comment_replies_renderer = try_get(
2171 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2172
2173 if comment_replies_renderer:
2174 tracker['current_page_thread'] += 1
2175 comment_entries_iter = self._comment_entries(
2176 comment_replies_renderer, ytcfg, video_id,
2177 parent=comment.get('id'), tracker=tracker)
2178 for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):
2179 yield reply_comment
2180
2181 # Keeps track of counts across recursive calls
2182 if not tracker:
2183 tracker = dict(
2184 running_total=0,
2185 est_total=0,
2186 current_page_thread=0,
2187 total_parent_comments=0,
2188 total_reply_comments=0)
2189
2190 # TODO: Deprecated
2191 # YouTube comments have a max depth of 2
2192 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2193 if max_depth:
2194 self._downloader.deprecation_warning(
2195 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2196 if max_depth == 1 and parent:
2197 return
2198
2199 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2200 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2201
2202 continuation = self._extract_continuation(root_continuation_data)
2203 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2204 if message and not parent:
2205 self.report_warning(message, video_id=video_id)
2206
2207 response = None
2208 is_first_continuation = parent is None
2209
2210 for page_num in itertools.count(0):
2211 if not continuation:
2212 break
2213 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2214 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2215 if page_num == 0:
2216 if is_first_continuation:
2217 note_prefix = 'Downloading comment section API JSON'
2218 else:
2219 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2220 tracker['current_page_thread'], comment_prog_str)
2221 else:
2222 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2223 ' ' if parent else '', ' replies' if parent else '',
2224 page_num, comment_prog_str)
2225
2226 response = self._extract_response(
2227 item_id=None, query=continuation,
2228 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2229 check_get_keys='onResponseReceivedEndpoints')
2230
2231 continuation_contents = traverse_obj(
2232 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
2233
2234 continuation = None
2235 for continuation_section in continuation_contents:
2236 continuation_items = traverse_obj(
2237 continuation_section,
2238 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2239 get_all=False, expected_type=list) or []
2240 if is_first_continuation:
2241 continuation = extract_header(continuation_items)
2242 is_first_continuation = False
2243 if continuation:
2244 break
2245 continue
2246
2247 for entry in extract_thread(continuation_items):
2248 if not entry:
2249 return
2250 yield entry
2251 continuation = self._extract_continuation({'contents': continuation_items})
2252 if continuation:
2253 break
2254
2255 def _get_comments(self, ytcfg, video_id, contents, webpage):
2256 """Entry for comment extraction"""
2257 def _real_comment_extract(contents):
2258 renderer = next((
2259 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2260 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2261 yield from self._comment_entries(renderer, ytcfg, video_id)
2262
2263 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
2264 # Force English regardless of account setting to prevent parsing issues
2265 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2266 ytcfg = copy.deepcopy(ytcfg)
2267 traverse_obj(
2268 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2269 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
2270
2271 @staticmethod
2272 def _get_checkok_params():
2273 return {'contentCheckOk': True, 'racyCheckOk': True}
2274
2275 @classmethod
2276 def _generate_player_context(cls, sts=None):
2277 context = {
2278 'html5Preference': 'HTML5_PREF_WANTS',
2279 }
2280 if sts is not None:
2281 context['signatureTimestamp'] = sts
2282 return {
2283 'playbackContext': {
2284 'contentPlaybackContext': context
2285 },
2286 **cls._get_checkok_params()
2287 }
2288
2289 @staticmethod
2290 def _is_agegated(player_response):
2291 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2292 return True
2293
2294 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2295 AGE_GATE_REASONS = (
2296 'confirm your age', 'age-restricted', 'inappropriate', # reason
2297 'age_verification_required', 'age_check_required', # status
2298 )
2299 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2300
2301 @staticmethod
2302 def _is_unplayable(player_response):
2303 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2304
2305 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2306
2307 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2308 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2309 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2310 headers = self.generate_api_headers(
2311 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2312
2313 yt_query = {'videoId': video_id}
2314 yt_query.update(self._generate_player_context(sts))
2315 return self._extract_response(
2316 item_id=video_id, ep='player', query=yt_query,
2317 ytcfg=player_ytcfg, headers=headers, fatal=True,
2318 default_client=client,
2319 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2320 ) or None
2321
2322 def _get_requested_clients(self, url, smuggled_data):
2323 requested_clients = []
2324 default = ['android', 'web']
2325 allowed_clients = sorted(
2326 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2327 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2328 for client in self._configuration_arg('player_client'):
2329 if client in allowed_clients:
2330 requested_clients.append(client)
2331 elif client == 'default':
2332 requested_clients.extend(default)
2333 elif client == 'all':
2334 requested_clients.extend(allowed_clients)
2335 else:
2336 self.report_warning(f'Skipping unsupported client {client}')
2337 if not requested_clients:
2338 requested_clients = default
2339
2340 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2341 requested_clients.extend(
2342 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2343
2344 return orderedSet(requested_clients)
2345
2346 def _extract_player_ytcfg(self, client, video_id):
2347 url = {
2348 'web_music': 'https://music.youtube.com',
2349 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2350 }.get(client)
2351 if not url:
2352 return {}
2353 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2354 return self.extract_ytcfg(video_id, webpage) or {}
2355
2356 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2357 initial_pr = None
2358 if webpage:
2359 initial_pr = self._extract_yt_initial_variable(
2360 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2361 video_id, 'initial player response')
2362
2363 original_clients = clients
2364 clients = clients[::-1]
2365 prs = []
2366
2367 def append_client(client_name):
2368 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2369 clients.append(client_name)
2370
2371 # Android player_response does not have microFormats which are needed for
2372 # extraction of some data. So we return the initial_pr with formats
2373 # stripped out even if not requested by the user
2374 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2375 if initial_pr:
2376 pr = dict(initial_pr)
2377 pr['streamingData'] = None
2378 prs.append(pr)
2379
2380 last_error = None
2381 tried_iframe_fallback = False
2382 player_url = None
2383 while clients:
2384 client = clients.pop()
2385 player_ytcfg = master_ytcfg if client == 'web' else {}
2386 if 'configs' not in self._configuration_arg('player_skip'):
2387 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2388
2389 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2390 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2391 if 'js' in self._configuration_arg('player_skip'):
2392 require_js_player = False
2393 player_url = None
2394
2395 if not player_url and not tried_iframe_fallback and require_js_player:
2396 player_url = self._download_player_url(video_id)
2397 tried_iframe_fallback = True
2398
2399 try:
2400 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2401 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2402 except ExtractorError as e:
2403 if last_error:
2404 self.report_warning(last_error)
2405 last_error = e
2406 continue
2407
2408 if pr:
2409 prs.append(pr)
2410
2411 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2412 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2413 append_client(client.replace('_agegate', '_creator'))
2414 elif self._is_agegated(pr):
2415 append_client(f'{client}_agegate')
2416
2417 if last_error:
2418 if not len(prs):
2419 raise last_error
2420 self.report_warning(last_error)
2421 return prs, player_url
2422
2423 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2424 itags, stream_ids = {}, []
2425 itag_qualities, res_qualities = {}, {}
2426 q = qualities([
2427 # Normally tiny is the smallest video-only formats. But
2428 # audio-only formats with unknown quality may get tagged as tiny
2429 'tiny',
2430 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2431 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2432 ])
2433 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2434
2435 for fmt in streaming_formats:
2436 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2437 continue
2438
2439 itag = str_or_none(fmt.get('itag'))
2440 audio_track = fmt.get('audioTrack') or {}
2441 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2442 if stream_id in stream_ids:
2443 continue
2444
2445 quality = fmt.get('quality')
2446 height = int_or_none(fmt.get('height'))
2447 if quality == 'tiny' or not quality:
2448 quality = fmt.get('audioQuality', '').lower() or quality
2449 # The 3gp format (17) in android client has a quality of "small",
2450 # but is actually worse than other formats
2451 if itag == '17':
2452 quality = 'tiny'
2453 if quality:
2454 if itag:
2455 itag_qualities[itag] = quality
2456 if height:
2457 res_qualities[height] = quality
2458 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2459 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2460 # number of fragment that would subsequently requested with (`&sq=N`)
2461 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2462 continue
2463
2464 fmt_url = fmt.get('url')
2465 if not fmt_url:
2466 sc = compat_parse_qs(fmt.get('signatureCipher'))
2467 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2468 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2469 if not (sc and fmt_url and encrypted_sig):
2470 continue
2471 if not player_url:
2472 continue
2473 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2474 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2475 fmt_url += '&' + sp + '=' + signature
2476
2477 query = parse_qs(fmt_url)
2478 throttled = False
2479 if query.get('n'):
2480 try:
2481 fmt_url = update_url_query(fmt_url, {
2482 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2483 except ExtractorError as e:
2484 self.report_warning(
2485 f'nsig extraction failed: You may experience throttling for some formats\n'
2486 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
2487 throttled = True
2488
2489 if itag:
2490 itags[itag] = 'https'
2491 stream_ids.append(stream_id)
2492
2493 tbr = float_or_none(
2494 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2495 dct = {
2496 'asr': int_or_none(fmt.get('audioSampleRate')),
2497 'filesize': int_or_none(fmt.get('contentLength')),
2498 'format_id': itag,
2499 'format_note': join_nonempty(
2500 '%s%s' % (audio_track.get('displayName') or '',
2501 ' (default)' if audio_track.get('audioIsDefault') else ''),
2502 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
2503 throttled and 'THROTTLED', delim=', '),
2504 'source_preference': -10 if throttled else -1,
2505 'fps': int_or_none(fmt.get('fps')) or None,
2506 'height': height,
2507 'quality': q(quality),
2508 'tbr': tbr,
2509 'url': fmt_url,
2510 'width': int_or_none(fmt.get('width')),
2511 'language': audio_track.get('id', '').split('.')[0],
2512 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2513 }
2514 mime_mobj = re.match(
2515 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2516 if mime_mobj:
2517 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2518 dct.update(parse_codecs(mime_mobj.group(2)))
2519 no_audio = dct.get('acodec') == 'none'
2520 no_video = dct.get('vcodec') == 'none'
2521 if no_audio:
2522 dct['vbr'] = tbr
2523 if no_video:
2524 dct['abr'] = tbr
2525 if no_audio or no_video:
2526 dct['downloader_options'] = {
2527 # Youtube throttles chunks >~10M
2528 'http_chunk_size': 10485760,
2529 }
2530 if dct.get('ext'):
2531 dct['container'] = dct['ext'] + '_dash'
2532 yield dct
2533
2534 skip_manifests = self._configuration_arg('skip')
2535 get_dash = (
2536 (not is_live or self._configuration_arg('include_live_dash'))
2537 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2538 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2539
2540 def process_manifest_format(f, proto, itag):
2541 if itag in itags:
2542 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2543 return False
2544 itag = f'{itag}-{proto}'
2545 if itag:
2546 f['format_id'] = itag
2547 itags[itag] = proto
2548
2549 f['quality'] = next((
2550 q(qdict[val])
2551 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
2552 if val in qdict), -1)
2553 return True
2554
2555 for sd in streaming_data:
2556 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2557 if hls_manifest_url:
2558 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2559 if process_manifest_format(f, 'hls', self._search_regex(
2560 r'/itag/(\d+)', f['url'], 'itag', default=None)):
2561 yield f
2562
2563 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2564 if dash_manifest_url:
2565 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2566 if process_manifest_format(f, 'dash', f['format_id']):
2567 f['filesize'] = int_or_none(self._search_regex(
2568 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2569 yield f
2570
2571 def _extract_storyboard(self, player_responses, duration):
2572 spec = get_first(
2573 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2574 if not spec:
2575 return
2576 base_url = spec.pop()
2577 L = len(spec) - 1
2578 for i, args in enumerate(spec):
2579 args = args.split('#')
2580 counts = list(map(int_or_none, args[:5]))
2581 if len(args) != 8 or not all(counts):
2582 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2583 continue
2584 width, height, frame_count, cols, rows = counts
2585 N, sigh = args[6:]
2586
2587 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2588 fragment_count = frame_count / (cols * rows)
2589 fragment_duration = duration / fragment_count
2590 yield {
2591 'format_id': f'sb{i}',
2592 'format_note': 'storyboard',
2593 'ext': 'mhtml',
2594 'protocol': 'mhtml',
2595 'acodec': 'none',
2596 'vcodec': 'none',
2597 'url': url,
2598 'width': width,
2599 'height': height,
2600 'fragments': [{
2601 'path': url.replace('$M', str(j)),
2602 'duration': min(fragment_duration, duration - (j * fragment_duration)),
2603 } for j in range(math.ceil(fragment_count))],
2604 }
2605
2606 def _real_extract(self, url):
2607 url, smuggled_data = unsmuggle_url(url, {})
2608 video_id = self._match_id(url)
2609
2610 base_url = self.http_scheme() + '//www.youtube.com/'
2611 webpage_url = base_url + 'watch?v=' + video_id
2612 webpage = None
2613 if 'webpage' not in self._configuration_arg('player_skip'):
2614 webpage = self._download_webpage(
2615 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2616
2617 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2618
2619 player_responses, player_url = self._extract_player_responses(
2620 self._get_requested_clients(url, smuggled_data),
2621 video_id, webpage, master_ytcfg)
2622
2623 playability_statuses = traverse_obj(
2624 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2625
2626 trailer_video_id = get_first(
2627 playability_statuses,
2628 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2629 expected_type=str)
2630 if trailer_video_id:
2631 return self.url_result(
2632 trailer_video_id, self.ie_key(), trailer_video_id)
2633
2634 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2635 if webpage else (lambda x: None))
2636
2637 video_details = traverse_obj(
2638 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2639 microformats = traverse_obj(
2640 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2641 expected_type=dict, default=[])
2642 video_title = (
2643 get_first(video_details, 'title')
2644 or self._get_text(microformats, (..., 'title'))
2645 or search_meta(['og:title', 'twitter:title', 'title']))
2646 video_description = get_first(video_details, 'shortDescription')
2647
2648 multifeed_metadata_list = get_first(
2649 player_responses,
2650 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2651 expected_type=str)
2652 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2653 if self.get_param('noplaylist'):
2654 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2655 else:
2656 entries = []
2657 feed_ids = []
2658 for feed in multifeed_metadata_list.split(','):
2659 # Unquote should take place before split on comma (,) since textual
2660 # fields may contain comma as well (see
2661 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2662 feed_data = compat_parse_qs(
2663 compat_urllib_parse_unquote_plus(feed))
2664
2665 def feed_entry(name):
2666 return try_get(
2667 feed_data, lambda x: x[name][0], compat_str)
2668
2669 feed_id = feed_entry('id')
2670 if not feed_id:
2671 continue
2672 feed_title = feed_entry('title')
2673 title = video_title
2674 if feed_title:
2675 title += ' (%s)' % feed_title
2676 entries.append({
2677 '_type': 'url_transparent',
2678 'ie_key': 'Youtube',
2679 'url': smuggle_url(
2680 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2681 {'force_singlefeed': True}),
2682 'title': title,
2683 })
2684 feed_ids.append(feed_id)
2685 self.to_screen(
2686 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2687 % (', '.join(feed_ids), video_id))
2688 return self.playlist_result(
2689 entries, video_id, video_title, video_description)
2690
2691 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2692 is_live = get_first(video_details, 'isLive')
2693 if is_live is None:
2694 is_live = get_first(live_broadcast_details, 'isLiveNow')
2695
2696 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2697 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2698
2699 if not formats:
2700 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2701 self.report_drm(video_id)
2702 pemr = get_first(
2703 playability_statuses,
2704 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2705 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2706 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2707 if subreason:
2708 if subreason == 'The uploader has not made this video available in your country.':
2709 countries = get_first(microformats, 'availableCountries')
2710 if not countries:
2711 regions_allowed = search_meta('regionsAllowed')
2712 countries = regions_allowed.split(',') if regions_allowed else None
2713 self.raise_geo_restricted(subreason, countries, metadata_available=True)
2714 reason += f'. {subreason}'
2715 if reason:
2716 self.raise_no_formats(reason, expected=True)
2717
2718 keywords = get_first(video_details, 'keywords', expected_type=list) or []
2719 if not keywords and webpage:
2720 keywords = [
2721 unescapeHTML(m.group('content'))
2722 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2723 for keyword in keywords:
2724 if keyword.startswith('yt:stretch='):
2725 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2726 if mobj:
2727 # NB: float is intentional for forcing float division
2728 w, h = (float(v) for v in mobj.groups())
2729 if w > 0 and h > 0:
2730 ratio = w / h
2731 for f in formats:
2732 if f.get('vcodec') != 'none':
2733 f['stretched_ratio'] = ratio
2734 break
2735
2736 thumbnails = []
2737 thumbnail_dicts = traverse_obj(
2738 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2739 expected_type=dict, default=[])
2740 for thumbnail in thumbnail_dicts:
2741 thumbnail_url = thumbnail.get('url')
2742 if not thumbnail_url:
2743 continue
2744 # Sometimes youtube gives a wrong thumbnail URL. See:
2745 # https://github.com/yt-dlp/yt-dlp/issues/233
2746 # https://github.com/ytdl-org/youtube-dl/issues/28023
2747 if 'maxresdefault' in thumbnail_url:
2748 thumbnail_url = thumbnail_url.split('?')[0]
2749 thumbnails.append({
2750 'url': thumbnail_url,
2751 'height': int_or_none(thumbnail.get('height')),
2752 'width': int_or_none(thumbnail.get('width')),
2753 })
2754 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2755 if thumbnail_url:
2756 thumbnails.append({
2757 'url': thumbnail_url,
2758 })
2759 original_thumbnails = thumbnails.copy()
2760
2761 # The best resolution thumbnails sometimes does not appear in the webpage
2762 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2763 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2764 thumbnail_names = [
2765 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
2766 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2767 'mqdefault', 'mq1', 'mq2', 'mq3',
2768 'default', '1', '2', '3'
2769 ]
2770 n_thumbnail_names = len(thumbnail_names)
2771 thumbnails.extend({
2772 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2773 video_id=video_id, name=name, ext=ext,
2774 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2775 } for name in thumbnail_names for ext in ('webp', 'jpg'))
2776 for thumb in thumbnails:
2777 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2778 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2779 self._remove_duplicate_formats(thumbnails)
2780 self._downloader._sort_thumbnails(original_thumbnails)
2781
2782 category = get_first(microformats, 'category') or search_meta('genre')
2783 channel_id = str_or_none(
2784 get_first(video_details, 'channelId')
2785 or get_first(microformats, 'externalChannelId')
2786 or search_meta('channelId'))
2787 duration = int_or_none(
2788 get_first(video_details, 'lengthSeconds')
2789 or get_first(microformats, 'lengthSeconds')
2790 or parse_duration(search_meta('duration'))) or None
2791 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2792
2793 live_content = get_first(video_details, 'isLiveContent')
2794 is_upcoming = get_first(video_details, 'isUpcoming')
2795 if is_live is None:
2796 if is_upcoming or live_content is False:
2797 is_live = False
2798 if is_upcoming is None and (live_content or is_live):
2799 is_upcoming = False
2800 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2801 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2802 if not duration and live_endtime and live_starttime:
2803 duration = live_endtime - live_starttime
2804
2805 formats.extend(self._extract_storyboard(player_responses, duration))
2806
2807 # Source is given priority since formats that throttle are given lower source_preference
2808 # When throttling issue is fully fixed, remove this
2809 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2810
2811 info = {
2812 'id': video_id,
2813 'title': self._live_title(video_title) if is_live else video_title,
2814 'formats': formats,
2815 'thumbnails': thumbnails,
2816 # The best thumbnail that we are sure exists. Prevents unnecessary
2817 # URL checking if user don't care about getting the best possible thumbnail
2818 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
2819 'description': video_description,
2820 'upload_date': unified_strdate(
2821 get_first(microformats, 'uploadDate')
2822 or search_meta('uploadDate')),
2823 'uploader': get_first(video_details, 'author'),
2824 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2825 'uploader_url': owner_profile_url,
2826 'channel_id': channel_id,
2827 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2828 'duration': duration,
2829 'view_count': int_or_none(
2830 get_first((video_details, microformats), (..., 'viewCount'))
2831 or search_meta('interactionCount')),
2832 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2833 'age_limit': 18 if (
2834 get_first(microformats, 'isFamilySafe') is False
2835 or search_meta('isFamilyFriendly') == 'false'
2836 or search_meta('og:restrictions:age') == '18+') else 0,
2837 'webpage_url': webpage_url,
2838 'categories': [category] if category else None,
2839 'tags': keywords,
2840 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2841 'is_live': is_live,
2842 'was_live': (False if is_live or is_upcoming or live_content is False
2843 else None if is_live is None or is_upcoming is None
2844 else live_content),
2845 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2846 'release_timestamp': live_starttime,
2847 }
2848
2849 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2850 if pctr:
2851 def get_lang_code(track):
2852 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
2853 or track.get('languageCode'))
2854
2855 # Converted into dicts to remove duplicates
2856 captions = {
2857 get_lang_code(sub): sub
2858 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2859 translation_languages = {
2860 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
2861 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2862
2863 def process_language(container, base_url, lang_code, sub_name, query):
2864 lang_subs = container.setdefault(lang_code, [])
2865 for fmt in self._SUBTITLE_FORMATS:
2866 query.update({
2867 'fmt': fmt,
2868 })
2869 lang_subs.append({
2870 'ext': fmt,
2871 'url': update_url_query(base_url, query),
2872 'name': sub_name,
2873 })
2874
2875 subtitles, automatic_captions = {}, {}
2876 for lang_code, caption_track in captions.items():
2877 base_url = caption_track.get('baseUrl')
2878 if not base_url:
2879 continue
2880 lang_name = self._get_text(caption_track, 'name', max_runs=1)
2881 if caption_track.get('kind') != 'asr':
2882 if not lang_code:
2883 continue
2884 process_language(
2885 subtitles, base_url, lang_code, lang_name, {})
2886 if not caption_track.get('isTranslatable'):
2887 continue
2888 for trans_code, trans_name in translation_languages.items():
2889 if not trans_code:
2890 continue
2891 if caption_track.get('kind') != 'asr':
2892 trans_code += f'-{lang_code}'
2893 trans_name += format_field(lang_name, template=' from %s')
2894 process_language(
2895 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
2896 info['automatic_captions'] = automatic_captions
2897 info['subtitles'] = subtitles
2898
2899 parsed_url = compat_urllib_parse_urlparse(url)
2900 for component in [parsed_url.fragment, parsed_url.query]:
2901 query = compat_parse_qs(component)
2902 for k, v in query.items():
2903 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2904 d_k += '_time'
2905 if d_k not in info and k in s_ks:
2906 info[d_k] = parse_duration(query[k][0])
2907
2908 # Youtube Music Auto-generated description
2909 if video_description:
2910 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2911 if mobj:
2912 release_year = mobj.group('release_year')
2913 release_date = mobj.group('release_date')
2914 if release_date:
2915 release_date = release_date.replace('-', '')
2916 if not release_year:
2917 release_year = release_date[:4]
2918 info.update({
2919 'album': mobj.group('album'.strip()),
2920 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2921 'track': mobj.group('track').strip(),
2922 'release_date': release_date,
2923 'release_year': int_or_none(release_year),
2924 })
2925
2926 initial_data = None
2927 if webpage:
2928 initial_data = self._extract_yt_initial_variable(
2929 webpage, self._YT_INITIAL_DATA_RE, video_id,
2930 'yt initial data')
2931 if not initial_data:
2932 query = {'videoId': video_id}
2933 query.update(self._get_checkok_params())
2934 initial_data = self._extract_response(
2935 item_id=video_id, ep='next', fatal=False,
2936 ytcfg=master_ytcfg, query=query,
2937 headers=self.generate_api_headers(ytcfg=master_ytcfg),
2938 note='Downloading initial data API JSON')
2939
2940 try:
2941 # This will error if there is no livechat
2942 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2943 info.setdefault('subtitles', {})['live_chat'] = [{
2944 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2945 'video_id': video_id,
2946 'ext': 'json',
2947 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2948 }]
2949 except (KeyError, IndexError, TypeError):
2950 pass
2951
2952 if initial_data:
2953 info['chapters'] = (
2954 self._extract_chapters_from_json(initial_data, duration)
2955 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2956 or None)
2957
2958 contents = try_get(
2959 initial_data,
2960 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2961 list) or []
2962 for content in contents:
2963 vpir = content.get('videoPrimaryInfoRenderer')
2964 if vpir:
2965 stl = vpir.get('superTitleLink')
2966 if stl:
2967 stl = self._get_text(stl)
2968 if try_get(
2969 vpir,
2970 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2971 info['location'] = stl
2972 else:
2973 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2974 if mobj:
2975 info.update({
2976 'series': mobj.group(1),
2977 'season_number': int(mobj.group(2)),
2978 'episode_number': int(mobj.group(3)),
2979 })
2980 for tlb in (try_get(
2981 vpir,
2982 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2983 list) or []):
2984 tbr = tlb.get('toggleButtonRenderer') or {}
2985 for getter, regex in [(
2986 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2987 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2988 lambda x: x['accessibility'],
2989 lambda x: x['accessibilityData']['accessibilityData'],
2990 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2991 label = (try_get(tbr, getter, dict) or {}).get('label')
2992 if label:
2993 mobj = re.match(regex, label)
2994 if mobj:
2995 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2996 break
2997 sbr_tooltip = try_get(
2998 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2999 if sbr_tooltip:
3000 like_count, dislike_count = sbr_tooltip.split(' / ')
3001 info.update({
3002 'like_count': str_to_int(like_count),
3003 'dislike_count': str_to_int(dislike_count),
3004 })
3005 vsir = content.get('videoSecondaryInfoRenderer')
3006 if vsir:
3007 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3008 rows = try_get(
3009 vsir,
3010 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3011 list) or []
3012 multiple_songs = False
3013 for row in rows:
3014 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3015 multiple_songs = True
3016 break
3017 for row in rows:
3018 mrr = row.get('metadataRowRenderer') or {}
3019 mrr_title = mrr.get('title')
3020 if not mrr_title:
3021 continue
3022 mrr_title = self._get_text(mrr, 'title')
3023 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3024 if mrr_title == 'License':
3025 info['license'] = mrr_contents_text
3026 elif not multiple_songs:
3027 if mrr_title == 'Album':
3028 info['album'] = mrr_contents_text
3029 elif mrr_title == 'Artist':
3030 info['artist'] = mrr_contents_text
3031 elif mrr_title == 'Song':
3032 info['track'] = mrr_contents_text
3033
3034 fallbacks = {
3035 'channel': 'uploader',
3036 'channel_id': 'uploader_id',
3037 'channel_url': 'uploader_url',
3038 }
3039 for to, frm in fallbacks.items():
3040 if not info.get(to):
3041 info[to] = info.get(frm)
3042
3043 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3044 v = info.get(s_k)
3045 if v:
3046 info[d_k] = v
3047
3048 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3049 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3050 is_membersonly = None
3051 is_premium = None
3052 if initial_data and is_private is not None:
3053 is_membersonly = False
3054 is_premium = False
3055 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3056 badge_labels = set()
3057 for content in contents:
3058 if not isinstance(content, dict):
3059 continue
3060 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3061 for badge_label in badge_labels:
3062 if badge_label.lower() == 'members only':
3063 is_membersonly = True
3064 elif badge_label.lower() == 'premium':
3065 is_premium = True
3066 elif badge_label.lower() == 'unlisted':
3067 is_unlisted = True
3068
3069 info['availability'] = self._availability(
3070 is_private=is_private,
3071 needs_premium=is_premium,
3072 needs_subscription=is_membersonly,
3073 needs_auth=info['age_limit'] >= 18,
3074 is_unlisted=None if is_private is None else is_unlisted)
3075
3076 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3077
3078 self.mark_watched(video_id, player_responses)
3079
3080 return info
3081
3082
3083 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
3084
3085 def _extract_channel_id(self, webpage):
3086 channel_id = self._html_search_meta(
3087 'channelId', webpage, 'channel id', default=None)
3088 if channel_id:
3089 return channel_id
3090 channel_url = self._html_search_meta(
3091 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3092 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3093 'twitter:app:url:googleplay'), webpage, 'channel url')
3094 return self._search_regex(
3095 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3096 channel_url, 'channel id')
3097
3098 @staticmethod
3099 def _extract_basic_item_renderer(item):
3100 # Modified from _extract_grid_item_renderer
3101 known_basic_renderers = (
3102 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3103 )
3104 for key, renderer in item.items():
3105 if not isinstance(renderer, dict):
3106 continue
3107 elif key in known_basic_renderers:
3108 return renderer
3109 elif key.startswith('grid') and key.endswith('Renderer'):
3110 return renderer
3111
3112 def _grid_entries(self, grid_renderer):
3113 for item in grid_renderer['items']:
3114 if not isinstance(item, dict):
3115 continue
3116 renderer = self._extract_basic_item_renderer(item)
3117 if not isinstance(renderer, dict):
3118 continue
3119 title = self._get_text(renderer, 'title')
3120
3121 # playlist
3122 playlist_id = renderer.get('playlistId')
3123 if playlist_id:
3124 yield self.url_result(
3125 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3126 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3127 video_title=title)
3128 continue
3129 # video
3130 video_id = renderer.get('videoId')
3131 if video_id:
3132 yield self._extract_video(renderer)
3133 continue
3134 # channel
3135 channel_id = renderer.get('channelId')
3136 if channel_id:
3137 yield self.url_result(
3138 'https://www.youtube.com/channel/%s' % channel_id,
3139 ie=YoutubeTabIE.ie_key(), video_title=title)
3140 continue
3141 # generic endpoint URL support
3142 ep_url = urljoin('https://www.youtube.com/', try_get(
3143 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3144 compat_str))
3145 if ep_url:
3146 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3147 if ie.suitable(ep_url):
3148 yield self.url_result(
3149 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3150 break
3151
3152 def _shelf_entries_from_content(self, shelf_renderer):
3153 content = shelf_renderer.get('content')
3154 if not isinstance(content, dict):
3155 return
3156 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3157 if renderer:
3158 # TODO: add support for nested playlists so each shelf is processed
3159 # as separate playlist
3160 # TODO: this includes only first N items
3161 for entry in self._grid_entries(renderer):
3162 yield entry
3163 renderer = content.get('horizontalListRenderer')
3164 if renderer:
3165 # TODO
3166 pass
3167
3168 def _shelf_entries(self, shelf_renderer, skip_channels=False):
3169 ep = try_get(
3170 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3171 compat_str)
3172 shelf_url = urljoin('https://www.youtube.com', ep)
3173 if shelf_url:
3174 # Skipping links to another channels, note that checking for
3175 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3176 # will not work
3177 if skip_channels and '/channels?' in shelf_url:
3178 return
3179 title = self._get_text(shelf_renderer, 'title')
3180 yield self.url_result(shelf_url, video_title=title)
3181 # Shelf may not contain shelf URL, fallback to extraction from content
3182 for entry in self._shelf_entries_from_content(shelf_renderer):
3183 yield entry
3184
3185 def _playlist_entries(self, video_list_renderer):
3186 for content in video_list_renderer['contents']:
3187 if not isinstance(content, dict):
3188 continue
3189 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3190 if not isinstance(renderer, dict):
3191 continue
3192 video_id = renderer.get('videoId')
3193 if not video_id:
3194 continue
3195 yield self._extract_video(renderer)
3196
3197 def _rich_entries(self, rich_grid_renderer):
3198 renderer = try_get(
3199 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3200 video_id = renderer.get('videoId')
3201 if not video_id:
3202 return
3203 yield self._extract_video(renderer)
3204
3205 def _video_entry(self, video_renderer):
3206 video_id = video_renderer.get('videoId')
3207 if video_id:
3208 return self._extract_video(video_renderer)
3209
3210 def _post_thread_entries(self, post_thread_renderer):
3211 post_renderer = try_get(
3212 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3213 if not post_renderer:
3214 return
3215 # video attachment
3216 video_renderer = try_get(
3217 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3218 video_id = video_renderer.get('videoId')
3219 if video_id:
3220 entry = self._extract_video(video_renderer)
3221 if entry:
3222 yield entry
3223 # playlist attachment
3224 playlist_id = try_get(
3225 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3226 if playlist_id:
3227 yield self.url_result(
3228 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3229 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3230 # inline video links
3231 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3232 for run in runs:
3233 if not isinstance(run, dict):
3234 continue
3235 ep_url = try_get(
3236 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3237 if not ep_url:
3238 continue
3239 if not YoutubeIE.suitable(ep_url):
3240 continue
3241 ep_video_id = YoutubeIE._match_id(ep_url)
3242 if video_id == ep_video_id:
3243 continue
3244 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3245
3246 def _post_thread_continuation_entries(self, post_thread_continuation):
3247 contents = post_thread_continuation.get('contents')
3248 if not isinstance(contents, list):
3249 return
3250 for content in contents:
3251 renderer = content.get('backstagePostThreadRenderer')
3252 if not isinstance(renderer, dict):
3253 continue
3254 for entry in self._post_thread_entries(renderer):
3255 yield entry
3256
3257 r''' # unused
3258 def _rich_grid_entries(self, contents):
3259 for content in contents:
3260 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3261 if video_renderer:
3262 entry = self._video_entry(video_renderer)
3263 if entry:
3264 yield entry
3265 '''
3266 def _extract_entries(self, parent_renderer, continuation_list):
3267 # continuation_list is modified in-place with continuation_list = [continuation_token]
3268 continuation_list[:] = [None]
3269 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3270 for content in contents:
3271 if not isinstance(content, dict):
3272 continue
3273 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3274 if not is_renderer:
3275 renderer = content.get('richItemRenderer')
3276 if renderer:
3277 for entry in self._rich_entries(renderer):
3278 yield entry
3279 continuation_list[0] = self._extract_continuation(parent_renderer)
3280 continue
3281 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3282 for isr_content in isr_contents:
3283 if not isinstance(isr_content, dict):
3284 continue
3285
3286 known_renderers = {
3287 'playlistVideoListRenderer': self._playlist_entries,
3288 'gridRenderer': self._grid_entries,
3289 'shelfRenderer': lambda x: self._shelf_entries(x),
3290 'backstagePostThreadRenderer': self._post_thread_entries,
3291 'videoRenderer': lambda x: [self._video_entry(x)],
3292 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
3293 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
3294 }
3295 for key, renderer in isr_content.items():
3296 if key not in known_renderers:
3297 continue
3298 for entry in known_renderers[key](renderer):
3299 if entry:
3300 yield entry
3301 continuation_list[0] = self._extract_continuation(renderer)
3302 break
3303
3304 if not continuation_list[0]:
3305 continuation_list[0] = self._extract_continuation(is_renderer)
3306
3307 if not continuation_list[0]:
3308 continuation_list[0] = self._extract_continuation(parent_renderer)
3309
3310 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3311 continuation_list = [None]
3312 extract_entries = lambda x: self._extract_entries(x, continuation_list)
3313 tab_content = try_get(tab, lambda x: x['content'], dict)
3314 if not tab_content:
3315 return
3316 parent_renderer = (
3317 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3318 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3319 for entry in extract_entries(parent_renderer):
3320 yield entry
3321 continuation = continuation_list[0]
3322
3323 for page_num in itertools.count(1):
3324 if not continuation:
3325 break
3326 headers = self.generate_api_headers(
3327 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3328 response = self._extract_response(
3329 item_id='%s page %s' % (item_id, page_num),
3330 query=continuation, headers=headers, ytcfg=ytcfg,
3331 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3332
3333 if not response:
3334 break
3335 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3336 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3337 visitor_data = self._extract_visitor_data(response) or visitor_data
3338
3339 known_continuation_renderers = {
3340 'playlistVideoListContinuation': self._playlist_entries,
3341 'gridContinuation': self._grid_entries,
3342 'itemSectionContinuation': self._post_thread_continuation_entries,
3343 'sectionListContinuation': extract_entries, # for feeds
3344 }
3345 continuation_contents = try_get(
3346 response, lambda x: x['continuationContents'], dict) or {}
3347 continuation_renderer = None
3348 for key, value in continuation_contents.items():
3349 if key not in known_continuation_renderers:
3350 continue
3351 continuation_renderer = value
3352 continuation_list = [None]
3353 for entry in known_continuation_renderers[key](continuation_renderer):
3354 yield entry
3355 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3356 break
3357 if continuation_renderer:
3358 continue
3359
3360 known_renderers = {
3361 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3362 'gridVideoRenderer': (self._grid_entries, 'items'),
3363 'gridChannelRenderer': (self._grid_entries, 'items'),
3364 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3365 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
3366 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
3367 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3368 }
3369 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3370 continuation_items = try_get(
3371 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3372 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3373 video_items_renderer = None
3374 for key, value in continuation_item.items():
3375 if key not in known_renderers:
3376 continue
3377 video_items_renderer = {known_renderers[key][1]: continuation_items}
3378 continuation_list = [None]
3379 for entry in known_renderers[key][0](video_items_renderer):
3380 yield entry
3381 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3382 break
3383 if video_items_renderer:
3384 continue
3385 break
3386
3387 @staticmethod
3388 def _extract_selected_tab(tabs):
3389 for tab in tabs:
3390 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3391 if renderer.get('selected') is True:
3392 return renderer
3393 else:
3394 raise ExtractorError('Unable to find selected tab')
3395
3396 @classmethod
3397 def _extract_uploader(cls, data):
3398 uploader = {}
3399 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3400 owner = try_get(
3401 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3402 if owner:
3403 uploader['uploader'] = owner.get('text')
3404 uploader['uploader_id'] = try_get(
3405 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3406 uploader['uploader_url'] = urljoin(
3407 'https://www.youtube.com/',
3408 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3409 return {k: v for k, v in uploader.items() if v is not None}
3410
3411 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
3412 playlist_id = title = description = channel_url = channel_name = channel_id = None
3413 thumbnails_list = []
3414 tags = []
3415
3416 selected_tab = self._extract_selected_tab(tabs)
3417 renderer = try_get(
3418 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3419 if renderer:
3420 channel_name = renderer.get('title')
3421 channel_url = renderer.get('channelUrl')
3422 channel_id = renderer.get('externalId')
3423 else:
3424 renderer = try_get(
3425 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3426
3427 if renderer:
3428 title = renderer.get('title')
3429 description = renderer.get('description', '')
3430 playlist_id = channel_id
3431 tags = renderer.get('keywords', '').split()
3432 thumbnails_list = (
3433 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3434 or try_get(
3435 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3436 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3437 list)
3438 or [])
3439
3440 thumbnails = []
3441 for t in thumbnails_list:
3442 if not isinstance(t, dict):
3443 continue
3444 thumbnail_url = url_or_none(t.get('url'))
3445 if not thumbnail_url:
3446 continue
3447 thumbnails.append({
3448 'url': thumbnail_url,
3449 'width': int_or_none(t.get('width')),
3450 'height': int_or_none(t.get('height')),
3451 })
3452 if playlist_id is None:
3453 playlist_id = item_id
3454 if title is None:
3455 title = (
3456 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3457 or playlist_id)
3458 title += format_field(selected_tab, 'title', ' - %s')
3459 title += format_field(selected_tab, 'expandedText', ' - %s')
3460 metadata = {
3461 'playlist_id': playlist_id,
3462 'playlist_title': title,
3463 'playlist_description': description,
3464 'uploader': channel_name,
3465 'uploader_id': channel_id,
3466 'uploader_url': channel_url,
3467 'thumbnails': thumbnails,
3468 'tags': tags,
3469 }
3470 availability = self._extract_availability(data)
3471 if availability:
3472 metadata['availability'] = availability
3473 if not channel_id:
3474 metadata.update(self._extract_uploader(data))
3475 metadata.update({
3476 'channel': metadata['uploader'],
3477 'channel_id': metadata['uploader_id'],
3478 'channel_url': metadata['uploader_url']})
3479 return self.playlist_result(
3480 self._entries(
3481 selected_tab, playlist_id, ytcfg,
3482 self._extract_account_syncid(ytcfg, data),
3483 self._extract_visitor_data(data, ytcfg)),
3484 **metadata)
3485
3486 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3487 first_id = last_id = response = None
3488 for page_num in itertools.count(1):
3489 videos = list(self._playlist_entries(playlist))
3490 if not videos:
3491 return
3492 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3493 if start >= len(videos):
3494 return
3495 for video in videos[start:]:
3496 if video['id'] == first_id:
3497 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3498 return
3499 yield video
3500 first_id = first_id or videos[0]['id']
3501 last_id = videos[-1]['id']
3502 watch_endpoint = try_get(
3503 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3504 headers = self.generate_api_headers(
3505 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3506 visitor_data=self._extract_visitor_data(response, data, ytcfg))
3507 query = {
3508 'playlistId': playlist_id,
3509 'videoId': watch_endpoint.get('videoId') or last_id,
3510 'index': watch_endpoint.get('index') or len(videos),
3511 'params': watch_endpoint.get('params') or 'OAE%3D'
3512 }
3513 response = self._extract_response(
3514 item_id='%s page %d' % (playlist_id, page_num),
3515 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3516 check_get_keys='contents'
3517 )
3518 playlist = try_get(
3519 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3520
3521 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
3522 title = playlist.get('title') or try_get(
3523 data, lambda x: x['titleText']['simpleText'], compat_str)
3524 playlist_id = playlist.get('playlistId') or item_id
3525
3526 # Delegating everything except mix playlists to regular tab-based playlist URL
3527 playlist_url = urljoin(url, try_get(
3528 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3529 compat_str))
3530 if playlist_url and playlist_url != url:
3531 return self.url_result(
3532 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3533 video_title=title)
3534
3535 return self.playlist_result(
3536 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
3537 playlist_id=playlist_id, playlist_title=title)
3538
3539 def _extract_availability(self, data):
3540 """
3541 Gets the availability of a given playlist/tab.
3542 Note: Unless YouTube tells us explicitly, we do not assume it is public
3543 @param data: response
3544 """
3545 is_private = is_unlisted = None
3546 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3547 badge_labels = self._extract_badges(renderer)
3548
3549 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3550 privacy_dropdown_entries = try_get(
3551 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3552 for renderer_dict in privacy_dropdown_entries:
3553 is_selected = try_get(
3554 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3555 if not is_selected:
3556 continue
3557 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
3558 if label:
3559 badge_labels.add(label.lower())
3560 break
3561
3562 for badge_label in badge_labels:
3563 if badge_label == 'unlisted':
3564 is_unlisted = True
3565 elif badge_label == 'private':
3566 is_private = True
3567 elif badge_label == 'public':
3568 is_unlisted = is_private = False
3569 return self._availability(is_private, False, False, False, is_unlisted)
3570
3571 @staticmethod
3572 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3573 sidebar_renderer = try_get(
3574 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3575 for item in sidebar_renderer:
3576 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3577 if renderer:
3578 return renderer
3579
3580 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
3581 """
3582 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3583 """
3584 browse_id = params = None
3585 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3586 if not renderer:
3587 return
3588 menu_renderer = try_get(
3589 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3590 for menu_item in menu_renderer:
3591 if not isinstance(menu_item, dict):
3592 continue
3593 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3594 text = try_get(
3595 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3596 if not text or text.lower() != 'show unavailable videos':
3597 continue
3598 browse_endpoint = try_get(
3599 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3600 browse_id = browse_endpoint.get('browseId')
3601 params = browse_endpoint.get('params')
3602 break
3603
3604 headers = self.generate_api_headers(
3605 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3606 visitor_data=self._extract_visitor_data(data, ytcfg))
3607 query = {
3608 'params': params or 'wgYCCAA=',
3609 'browseId': browse_id or 'VL%s' % item_id
3610 }
3611 return self._extract_response(
3612 item_id=item_id, headers=headers, query=query,
3613 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
3614 note='Downloading API JSON with unavailable videos')
3615
3616 def _extract_webpage(self, url, item_id, fatal=True):
3617 retries = self.get_param('extractor_retries', 3)
3618 count = -1
3619 webpage = data = last_error = None
3620 while count < retries:
3621 count += 1
3622 # Sometimes youtube returns a webpage with incomplete ytInitialData
3623 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3624 if last_error:
3625 self.report_warning('%s. Retrying ...' % last_error)
3626 try:
3627 webpage = self._download_webpage(
3628 url, item_id,
3629 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
3630 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
3631 except ExtractorError as e:
3632 if isinstance(e.cause, network_exceptions):
3633 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
3634 last_error = error_to_compat_str(e.cause or e.msg)
3635 if count < retries:
3636 continue
3637 if fatal:
3638 raise
3639 self.report_warning(error_to_compat_str(e))
3640 break
3641 else:
3642 try:
3643 self._extract_and_report_alerts(data)
3644 except ExtractorError as e:
3645 if fatal:
3646 raise
3647 self.report_warning(error_to_compat_str(e))
3648 break
3649
3650 if dict_get(data, ('contents', 'currentVideoEndpoint')):
3651 break
3652
3653 last_error = 'Incomplete yt initial data received'
3654 if count >= retries:
3655 if fatal:
3656 raise ExtractorError(last_error)
3657 self.report_warning(last_error)
3658 break
3659
3660 return webpage, data
3661
3662 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
3663 data = None
3664 if 'webpage' not in self._configuration_arg('skip'):
3665 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
3666 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
3667 if not data:
3668 if not ytcfg and self.is_authenticated:
3669 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
3670 if 'authcheck' not in self._configuration_arg('skip') and fatal:
3671 raise ExtractorError(
3672 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
3673 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
3674 expected=True)
3675 self.report_warning(msg, only_once=True)
3676 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
3677 return data, ytcfg
3678
3679 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
3680 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
3681 resolve_response = self._extract_response(
3682 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
3683 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
3684 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
3685 for ep_key, ep in endpoints.items():
3686 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
3687 if params:
3688 return self._extract_response(
3689 item_id=item_id, query=params, ep=ep, headers=headers,
3690 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
3691 check_get_keys=('contents', 'currentVideoEndpoint'))
3692 err_note = 'Failed to resolve url (does the playlist exist?)'
3693 if fatal:
3694 raise ExtractorError(err_note, expected=True)
3695 self.report_warning(err_note, item_id)
3696
3697 @staticmethod
3698 def _smuggle_data(entries, data):
3699 for entry in entries:
3700 if data:
3701 entry['url'] = smuggle_url(entry['url'], data)
3702 yield entry
3703
3704 _SEARCH_PARAMS = None
3705
3706 def _search_results(self, query, params=NO_DEFAULT):
3707 data = {'query': query}
3708 if params is NO_DEFAULT:
3709 params = self._SEARCH_PARAMS
3710 if params:
3711 data['params'] = params
3712 continuation_list = [None]
3713 for page_num in itertools.count(1):
3714 data.update(continuation_list[0] or {})
3715 search = self._extract_response(
3716 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3717 check_get_keys=('contents', 'onResponseReceivedCommands'))
3718 slr_contents = try_get(
3719 search,
3720 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3721 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3722 list)
3723 yield from self._extract_entries({'contents': slr_contents}, continuation_list)
3724 if not continuation_list[0]:
3725 break
3726
3727
3728 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
3729 IE_DESC = 'YouTube Tabs'
3730 _VALID_URL = r'''(?x:
3731 https?://
3732 (?:\w+\.)?
3733 (?:
3734 youtube(?:kids)?\.com|
3735 %(invidious)s
3736 )/
3737 (?:
3738 (?P<channel_type>channel|c|user|browse)/|
3739 (?P<not_channel>
3740 feed/|hashtag/|
3741 (?:playlist|watch)\?.*?\blist=
3742 )|
3743 (?!(?:%(reserved_names)s)\b) # Direct URLs
3744 )
3745 (?P<id>[^/?\#&]+)
3746 )''' % {
3747 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3748 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3749 }
3750 IE_NAME = 'youtube:tab'
3751
3752 _TESTS = [{
3753 'note': 'playlists, multipage',
3754 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3755 'playlist_mincount': 94,
3756 'info_dict': {
3757 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3758 'title': 'Игорь Клейнер - Playlists',
3759 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3760 'uploader': 'Игорь Клейнер',
3761 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3762 },
3763 }, {
3764 'note': 'playlists, multipage, different order',
3765 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3766 'playlist_mincount': 94,
3767 'info_dict': {
3768 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3769 'title': 'Игорь Клейнер - Playlists',
3770 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3771 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3772 'uploader': 'Игорь Клейнер',
3773 },
3774 }, {
3775 'note': 'playlists, series',
3776 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3777 'playlist_mincount': 5,
3778 'info_dict': {
3779 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3780 'title': '3Blue1Brown - Playlists',
3781 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3782 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3783 'uploader': '3Blue1Brown',
3784 },
3785 }, {
3786 'note': 'playlists, singlepage',
3787 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3788 'playlist_mincount': 4,
3789 'info_dict': {
3790 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3791 'title': 'ThirstForScience - Playlists',
3792 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3793 'uploader': 'ThirstForScience',
3794 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3795 }
3796 }, {
3797 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3798 'only_matching': True,
3799 }, {
3800 'note': 'basic, single video playlist',
3801 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3802 'info_dict': {
3803 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3804 'uploader': 'Sergey M.',
3805 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3806 'title': 'youtube-dl public playlist',
3807 },
3808 'playlist_count': 1,
3809 }, {
3810 'note': 'empty playlist',
3811 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3812 'info_dict': {
3813 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3814 'uploader': 'Sergey M.',
3815 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3816 'title': 'youtube-dl empty playlist',
3817 },
3818 'playlist_count': 0,
3819 }, {
3820 'note': 'Home tab',
3821 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3822 'info_dict': {
3823 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3824 'title': 'lex will - Home',
3825 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3826 'uploader': 'lex will',
3827 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3828 },
3829 'playlist_mincount': 2,
3830 }, {
3831 'note': 'Videos tab',
3832 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3833 'info_dict': {
3834 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3835 'title': 'lex will - Videos',
3836 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3837 'uploader': 'lex will',
3838 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3839 },
3840 'playlist_mincount': 975,
3841 }, {
3842 'note': 'Videos tab, sorted by popular',
3843 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3844 'info_dict': {
3845 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3846 'title': 'lex will - Videos',
3847 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3848 'uploader': 'lex will',
3849 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3850 },
3851 'playlist_mincount': 199,
3852 }, {
3853 'note': 'Playlists tab',
3854 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3855 'info_dict': {
3856 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3857 'title': 'lex will - Playlists',
3858 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3859 'uploader': 'lex will',
3860 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3861 },
3862 'playlist_mincount': 17,
3863 }, {
3864 'note': 'Community tab',
3865 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3866 'info_dict': {
3867 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3868 'title': 'lex will - Community',
3869 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3870 'uploader': 'lex will',
3871 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3872 },
3873 'playlist_mincount': 18,
3874 }, {
3875 'note': 'Channels tab',
3876 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3877 'info_dict': {
3878 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3879 'title': 'lex will - Channels',
3880 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3881 'uploader': 'lex will',
3882 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3883 },
3884 'playlist_mincount': 12,
3885 }, {
3886 'note': 'Search tab',
3887 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3888 'playlist_mincount': 40,
3889 'info_dict': {
3890 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3891 'title': '3Blue1Brown - Search - linear algebra',
3892 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3893 'uploader': '3Blue1Brown',
3894 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3895 },
3896 }, {
3897 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3898 'only_matching': True,
3899 }, {
3900 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3901 'only_matching': True,
3902 }, {
3903 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3904 'only_matching': True,
3905 }, {
3906 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3907 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3908 'info_dict': {
3909 'title': '29C3: Not my department',
3910 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3911 'uploader': 'Christiaan008',
3912 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3913 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3914 },
3915 'playlist_count': 96,
3916 }, {
3917 'note': 'Large playlist',
3918 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3919 'info_dict': {
3920 'title': 'Uploads from Cauchemar',
3921 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3922 'uploader': 'Cauchemar',
3923 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3924 },
3925 'playlist_mincount': 1123,
3926 }, {
3927 'note': 'even larger playlist, 8832 videos',
3928 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3929 'only_matching': True,
3930 }, {
3931 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3932 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3933 'info_dict': {
3934 'title': 'Uploads from Interstellar Movie',
3935 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3936 'uploader': 'Interstellar Movie',
3937 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3938 },
3939 'playlist_mincount': 21,
3940 }, {
3941 'note': 'Playlist with "show unavailable videos" button',
3942 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3943 'info_dict': {
3944 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3945 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3946 'uploader': 'Phim Siêu Nhân Nhật Bản',
3947 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3948 },
3949 'playlist_mincount': 200,
3950 }, {
3951 'note': 'Playlist with unavailable videos in page 7',
3952 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3953 'info_dict': {
3954 'title': 'Uploads from BlankTV',
3955 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3956 'uploader': 'BlankTV',
3957 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3958 },
3959 'playlist_mincount': 1000,
3960 }, {
3961 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3962 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3963 'info_dict': {
3964 'title': 'Data Analysis with Dr Mike Pound',
3965 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3966 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3967 'uploader': 'Computerphile',
3968 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3969 },
3970 'playlist_mincount': 11,
3971 }, {
3972 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3973 'only_matching': True,
3974 }, {
3975 'note': 'Playlist URL that does not actually serve a playlist',
3976 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3977 'info_dict': {
3978 'id': 'FqZTN594JQw',
3979 'ext': 'webm',
3980 'title': "Smiley's People 01 detective, Adventure Series, Action",
3981 'uploader': 'STREEM',
3982 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3983 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3984 'upload_date': '20150526',
3985 'license': 'Standard YouTube License',
3986 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3987 'categories': ['People & Blogs'],
3988 'tags': list,
3989 'view_count': int,
3990 'like_count': int,
3991 'dislike_count': int,
3992 },
3993 'params': {
3994 'skip_download': True,
3995 },
3996 'skip': 'This video is not available.',
3997 'add_ie': [YoutubeIE.ie_key()],
3998 }, {
3999 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4000 'only_matching': True,
4001 }, {
4002 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4003 'only_matching': True,
4004 }, {
4005 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4006 'info_dict': {
4007 'id': '3yImotZU3tw', # This will keep changing
4008 'ext': 'mp4',
4009 'title': compat_str,
4010 'uploader': 'Sky News',
4011 'uploader_id': 'skynews',
4012 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4013 'upload_date': r're:\d{8}',
4014 'description': compat_str,
4015 'categories': ['News & Politics'],
4016 'tags': list,
4017 'like_count': int,
4018 'dislike_count': int,
4019 },
4020 'params': {
4021 'skip_download': True,
4022 },
4023 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
4024 }, {
4025 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4026 'info_dict': {
4027 'id': 'a48o2S1cPoo',
4028 'ext': 'mp4',
4029 'title': 'The Young Turks - Live Main Show',
4030 'uploader': 'The Young Turks',
4031 'uploader_id': 'TheYoungTurks',
4032 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4033 'upload_date': '20150715',
4034 'license': 'Standard YouTube License',
4035 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4036 'categories': ['News & Politics'],
4037 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4038 'like_count': int,
4039 'dislike_count': int,
4040 },
4041 'params': {
4042 'skip_download': True,
4043 },
4044 'only_matching': True,
4045 }, {
4046 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4047 'only_matching': True,
4048 }, {
4049 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4050 'only_matching': True,
4051 }, {
4052 'note': 'A channel that is not live. Should raise error',
4053 'url': 'https://www.youtube.com/user/numberphile/live',
4054 'only_matching': True,
4055 }, {
4056 'url': 'https://www.youtube.com/feed/trending',
4057 'only_matching': True,
4058 }, {
4059 'url': 'https://www.youtube.com/feed/library',
4060 'only_matching': True,
4061 }, {
4062 'url': 'https://www.youtube.com/feed/history',
4063 'only_matching': True,
4064 }, {
4065 'url': 'https://www.youtube.com/feed/subscriptions',
4066 'only_matching': True,
4067 }, {
4068 'url': 'https://www.youtube.com/feed/watch_later',
4069 'only_matching': True,
4070 }, {
4071 'note': 'Recommended - redirects to home page.',
4072 'url': 'https://www.youtube.com/feed/recommended',
4073 'only_matching': True,
4074 }, {
4075 'note': 'inline playlist with not always working continuations',
4076 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4077 'only_matching': True,
4078 }, {
4079 'url': 'https://www.youtube.com/course',
4080 'only_matching': True,
4081 }, {
4082 'url': 'https://www.youtube.com/zsecurity',
4083 'only_matching': True,
4084 }, {
4085 'url': 'http://www.youtube.com/NASAgovVideo/videos',
4086 'only_matching': True,
4087 }, {
4088 'url': 'https://www.youtube.com/TheYoungTurks/live',
4089 'only_matching': True,
4090 }, {
4091 'url': 'https://www.youtube.com/hashtag/cctv9',
4092 'info_dict': {
4093 'id': 'cctv9',
4094 'title': '#cctv9',
4095 },
4096 'playlist_mincount': 350,
4097 }, {
4098 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4099 'only_matching': True,
4100 }, {
4101 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4102 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4103 'only_matching': True
4104 }, {
4105 'note': '/browse/ should redirect to /channel/',
4106 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4107 'only_matching': True
4108 }, {
4109 'note': 'VLPL, should redirect to playlist?list=PL...',
4110 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4111 'info_dict': {
4112 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4113 'uploader': 'NoCopyrightSounds',
4114 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
4115 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4116 'title': 'NCS Releases',
4117 },
4118 'playlist_mincount': 166,
4119 }, {
4120 'note': 'Topic, should redirect to playlist?list=UU...',
4121 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4122 'info_dict': {
4123 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4124 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4125 'title': 'Uploads from Royalty Free Music - Topic',
4126 'uploader': 'Royalty Free Music - Topic',
4127 },
4128 'expected_warnings': [
4129 'A channel/user page was given',
4130 'The URL does not have a videos tab',
4131 ],
4132 'playlist_mincount': 101,
4133 }, {
4134 'note': 'Topic without a UU playlist',
4135 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
4136 'info_dict': {
4137 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
4138 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
4139 },
4140 'expected_warnings': [
4141 'A channel/user page was given',
4142 'The URL does not have a videos tab',
4143 'Falling back to channel URL',
4144 ],
4145 'playlist_mincount': 9,
4146 }, {
4147 'note': 'Youtube music Album',
4148 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
4149 'info_dict': {
4150 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
4151 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
4152 },
4153 'playlist_count': 50,
4154 }, {
4155 'note': 'unlisted single video playlist',
4156 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4157 'info_dict': {
4158 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4159 'uploader': 'colethedj',
4160 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4161 'title': 'yt-dlp unlisted playlist test',
4162 'availability': 'unlisted'
4163 },
4164 'playlist_count': 1,
4165 }, {
4166 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
4167 'url': 'https://www.youtube.com/feed/recommended',
4168 'info_dict': {
4169 'id': 'recommended',
4170 'title': 'recommended',
4171 },
4172 'playlist_mincount': 50,
4173 'params': {
4174 'skip_download': True,
4175 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4176 },
4177 }, {
4178 'note': 'API Fallback: /videos tab, sorted by oldest first',
4179 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
4180 'info_dict': {
4181 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4182 'title': 'Cody\'sLab - Videos',
4183 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
4184 'uploader': 'Cody\'sLab',
4185 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4186 },
4187 'playlist_mincount': 650,
4188 'params': {
4189 'skip_download': True,
4190 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4191 },
4192 }, {
4193 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
4194 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4195 'info_dict': {
4196 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4197 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4198 'title': 'Uploads from Royalty Free Music - Topic',
4199 'uploader': 'Royalty Free Music - Topic',
4200 },
4201 'expected_warnings': [
4202 'A channel/user page was given',
4203 'The URL does not have a videos tab',
4204 ],
4205 'playlist_mincount': 101,
4206 'params': {
4207 'skip_download': True,
4208 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4209 },
4210 }]
4211
4212 @classmethod
4213 def suitable(cls, url):
4214 return False if YoutubeIE.suitable(url) else super(
4215 YoutubeTabIE, cls).suitable(url)
4216
4217 def _real_extract(self, url):
4218 url, smuggled_data = unsmuggle_url(url, {})
4219 if self.is_music_url(url):
4220 smuggled_data['is_music_url'] = True
4221 info_dict = self.__real_extract(url, smuggled_data)
4222 if info_dict.get('entries'):
4223 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4224 return info_dict
4225
4226 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4227
4228 def __real_extract(self, url, smuggled_data):
4229 item_id = self._match_id(url)
4230 url = compat_urlparse.urlunparse(
4231 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4232 compat_opts = self.get_param('compat_opts', [])
4233
4234 def get_mobj(url):
4235 mobj = self._url_re.match(url).groupdict()
4236 mobj.update((k, '') for k, v in mobj.items() if v is None)
4237 return mobj
4238
4239 mobj = get_mobj(url)
4240 # Youtube returns incomplete data if tabname is not lower case
4241 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4242 if is_channel:
4243 if smuggled_data.get('is_music_url'):
4244 if item_id[:2] == 'VL':
4245 # Youtube music VL channels have an equivalent playlist
4246 item_id = item_id[2:]
4247 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4248 elif item_id[:2] == 'MP':
4249 # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4250 mdata = self._extract_tab_endpoint(
4251 'https://music.youtube.com/channel/%s' % item_id, item_id, default_client='web_music')
4252 murl = traverse_obj(
4253 mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), get_all=False, expected_type=compat_str)
4254 if not murl:
4255 raise ExtractorError('Failed to resolve album to playlist.')
4256 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
4257 elif mobj['channel_type'] == 'browse':
4258 # Youtube music /browse/ should be changed to /channel/
4259 pre = 'https://www.youtube.com/channel/%s' % item_id
4260 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4261 # Home URLs should redirect to /videos/
4262 self.report_warning(
4263 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4264 'To download only the videos in the home page, add a "/featured" to the URL')
4265 tab = '/videos'
4266
4267 url = ''.join((pre, tab, post))
4268 mobj = get_mobj(url)
4269
4270 # Handle both video/playlist URLs
4271 qs = parse_qs(url)
4272 video_id = qs.get('v', [None])[0]
4273 playlist_id = qs.get('list', [None])[0]
4274
4275 if not video_id and mobj['not_channel'].startswith('watch'):
4276 if not playlist_id:
4277 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4278 raise ExtractorError('Unable to recognize tab page')
4279 # Common mistake: https://www.youtube.com/watch?list=playlist_id
4280 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4281 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4282 mobj = get_mobj(url)
4283
4284 if video_id and playlist_id:
4285 if self.get_param('noplaylist'):
4286 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4287 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4288 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4289
4290 data, ytcfg = self._extract_data(url, item_id)
4291
4292 tabs = try_get(
4293 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4294 if tabs:
4295 selected_tab = self._extract_selected_tab(tabs)
4296 tab_name = selected_tab.get('title', '')
4297 if 'no-youtube-channel-redirect' not in compat_opts:
4298 if mobj['tab'] == '/live':
4299 # Live tab should have redirected to the video
4300 raise ExtractorError('The channel is not currently live', expected=True)
4301 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4302 if not mobj['not_channel'] and item_id[:2] == 'UC':
4303 # Topic channels don't have /videos. Use the equivalent playlist instead
4304 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4305 pl_id = 'UU%s' % item_id[2:]
4306 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4307 try:
4308 data, ytcfg, item_id, url = *self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True), pl_id, pl_url
4309 except ExtractorError:
4310 self.report_warning('The playlist gave error. Falling back to channel URL')
4311 else:
4312 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4313
4314 self.write_debug('Final URL: %s' % url)
4315
4316 # YouTube sometimes provides a button to reload playlist with unavailable videos.
4317 if 'no-youtube-unavailable-videos' not in compat_opts:
4318 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
4319 self._extract_and_report_alerts(data, only_once=True)
4320 tabs = try_get(
4321 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4322 if tabs:
4323 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
4324
4325 playlist = try_get(
4326 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4327 if playlist:
4328 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
4329
4330 video_id = try_get(
4331 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4332 compat_str) or video_id
4333 if video_id:
4334 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4335 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4336 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4337
4338 raise ExtractorError('Unable to recognize tab page')
4339
4340
4341 class YoutubePlaylistIE(InfoExtractor):
4342 IE_DESC = 'YouTube playlists'
4343 _VALID_URL = r'''(?x)(?:
4344 (?:https?://)?
4345 (?:\w+\.)?
4346 (?:
4347 (?:
4348 youtube(?:kids)?\.com|
4349 %(invidious)s
4350 )
4351 /.*?\?.*?\blist=
4352 )?
4353 (?P<id>%(playlist_id)s)
4354 )''' % {
4355 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4356 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4357 }
4358 IE_NAME = 'youtube:playlist'
4359 _TESTS = [{
4360 'note': 'issue #673',
4361 'url': 'PLBB231211A4F62143',
4362 'info_dict': {
4363 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4364 'id': 'PLBB231211A4F62143',
4365 'uploader': 'Wickydoo',
4366 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4367 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4368 },
4369 'playlist_mincount': 29,
4370 }, {
4371 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4372 'info_dict': {
4373 'title': 'YDL_safe_search',
4374 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4375 },
4376 'playlist_count': 2,
4377 'skip': 'This playlist is private',
4378 }, {
4379 'note': 'embedded',
4380 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4381 'playlist_count': 4,
4382 'info_dict': {
4383 'title': 'JODA15',
4384 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4385 'uploader': 'milan',
4386 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4387 }
4388 }, {
4389 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4390 'playlist_mincount': 654,
4391 'info_dict': {
4392 'title': '2018 Chinese New Singles (11/6 updated)',
4393 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4394 'uploader': 'LBK',
4395 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4396 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4397 }
4398 }, {
4399 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4400 'only_matching': True,
4401 }, {
4402 # music album playlist
4403 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4404 'only_matching': True,
4405 }]
4406
4407 @classmethod
4408 def suitable(cls, url):
4409 if YoutubeTabIE.suitable(url):
4410 return False
4411 from ..utils import parse_qs
4412 qs = parse_qs(url)
4413 if qs.get('v', [None])[0]:
4414 return False
4415 return super(YoutubePlaylistIE, cls).suitable(url)
4416
4417 def _real_extract(self, url):
4418 playlist_id = self._match_id(url)
4419 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4420 url = update_url_query(
4421 'https://www.youtube.com/playlist',
4422 parse_qs(url) or {'list': playlist_id})
4423 if is_music_url:
4424 url = smuggle_url(url, {'is_music_url': True})
4425 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4426
4427
4428 class YoutubeYtBeIE(InfoExtractor):
4429 IE_DESC = 'youtu.be'
4430 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4431 _TESTS = [{
4432 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4433 'info_dict': {
4434 'id': 'yeWKywCrFtk',
4435 'ext': 'mp4',
4436 'title': 'Small Scale Baler and Braiding Rugs',
4437 'uploader': 'Backus-Page House Museum',
4438 'uploader_id': 'backuspagemuseum',
4439 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4440 'upload_date': '20161008',
4441 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4442 'categories': ['Nonprofits & Activism'],
4443 'tags': list,
4444 'like_count': int,
4445 'dislike_count': int,
4446 },
4447 'params': {
4448 'noplaylist': True,
4449 'skip_download': True,
4450 },
4451 }, {
4452 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4453 'only_matching': True,
4454 }]
4455
4456 def _real_extract(self, url):
4457 mobj = self._match_valid_url(url)
4458 video_id = mobj.group('id')
4459 playlist_id = mobj.group('playlist_id')
4460 return self.url_result(
4461 update_url_query('https://www.youtube.com/watch', {
4462 'v': video_id,
4463 'list': playlist_id,
4464 'feature': 'youtu.be',
4465 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4466
4467
4468 class YoutubeYtUserIE(InfoExtractor):
4469 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
4470 _VALID_URL = r'ytuser:(?P<id>.+)'
4471 _TESTS = [{
4472 'url': 'ytuser:phihag',
4473 'only_matching': True,
4474 }]
4475
4476 def _real_extract(self, url):
4477 user_id = self._match_id(url)
4478 return self.url_result(
4479 'https://www.youtube.com/user/%s/videos' % user_id,
4480 ie=YoutubeTabIE.ie_key(), video_id=user_id)
4481
4482
4483 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4484 IE_NAME = 'youtube:favorites'
4485 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
4486 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4487 _LOGIN_REQUIRED = True
4488 _TESTS = [{
4489 'url': ':ytfav',
4490 'only_matching': True,
4491 }, {
4492 'url': ':ytfavorites',
4493 'only_matching': True,
4494 }]
4495
4496 def _real_extract(self, url):
4497 return self.url_result(
4498 'https://www.youtube.com/playlist?list=LL',
4499 ie=YoutubeTabIE.ie_key())
4500
4501
4502 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4503 IE_DESC = 'YouTube search'
4504 IE_NAME = 'youtube:search'
4505 _SEARCH_KEY = 'ytsearch'
4506 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
4507 _TESTS = []
4508
4509
4510 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4511 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4512 _SEARCH_KEY = 'ytsearchdate'
4513 IE_DESC = 'YouTube search, newest videos first'
4514 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
4515
4516
4517 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
4518 IE_DESC = 'YouTube search URLs with sorting and filter support'
4519 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4520 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4521 _TESTS = [{
4522 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4523 'playlist_mincount': 5,
4524 'info_dict': {
4525 'id': 'youtube-dl test video',
4526 'title': 'youtube-dl test video',
4527 }
4528 }, {
4529 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
4530 'playlist_mincount': 5,
4531 'info_dict': {
4532 'id': 'python',
4533 'title': 'python',
4534 }
4535
4536 }, {
4537 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4538 'only_matching': True,
4539 }]
4540
4541 def _real_extract(self, url):
4542 qs = parse_qs(url)
4543 query = (qs.get('search_query') or qs.get('q'))[0]
4544 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
4545
4546
4547 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4548 """
4549 Base class for feed extractors
4550 Subclasses must define the _FEED_NAME property.
4551 """
4552 _LOGIN_REQUIRED = True
4553 _TESTS = []
4554
4555 @property
4556 def IE_NAME(self):
4557 return 'youtube:%s' % self._FEED_NAME
4558
4559 def _real_extract(self, url):
4560 return self.url_result(
4561 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4562 ie=YoutubeTabIE.ie_key())
4563
4564
4565 class YoutubeWatchLaterIE(InfoExtractor):
4566 IE_NAME = 'youtube:watchlater'
4567 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
4568 _VALID_URL = r':ytwatchlater'
4569 _TESTS = [{
4570 'url': ':ytwatchlater',
4571 'only_matching': True,
4572 }]
4573
4574 def _real_extract(self, url):
4575 return self.url_result(
4576 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4577
4578
4579 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4580 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
4581 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4582 _FEED_NAME = 'recommended'
4583 _LOGIN_REQUIRED = False
4584 _TESTS = [{
4585 'url': ':ytrec',
4586 'only_matching': True,
4587 }, {
4588 'url': ':ytrecommended',
4589 'only_matching': True,
4590 }, {
4591 'url': 'https://youtube.com',
4592 'only_matching': True,
4593 }]
4594
4595
4596 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4597 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
4598 _VALID_URL = r':ytsub(?:scription)?s?'
4599 _FEED_NAME = 'subscriptions'
4600 _TESTS = [{
4601 'url': ':ytsubs',
4602 'only_matching': True,
4603 }, {
4604 'url': ':ytsubscriptions',
4605 'only_matching': True,
4606 }]
4607
4608
4609 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4610 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
4611 _VALID_URL = r':ythis(?:tory)?'
4612 _FEED_NAME = 'history'
4613 _TESTS = [{
4614 'url': ':ythistory',
4615 'only_matching': True,
4616 }]
4617
4618
4619 class YoutubeTruncatedURLIE(InfoExtractor):
4620 IE_NAME = 'youtube:truncated_url'
4621 IE_DESC = False # Do not list
4622 _VALID_URL = r'''(?x)
4623 (?:https?://)?
4624 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4625 (?:watch\?(?:
4626 feature=[a-z_]+|
4627 annotation_id=annotation_[^&]+|
4628 x-yt-cl=[0-9]+|
4629 hl=[^&]*|
4630 t=[0-9]+
4631 )?
4632 |
4633 attribution_link\?a=[^&]+
4634 )
4635 $
4636 '''
4637
4638 _TESTS = [{
4639 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4640 'only_matching': True,
4641 }, {
4642 'url': 'https://www.youtube.com/watch?',
4643 'only_matching': True,
4644 }, {
4645 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4646 'only_matching': True,
4647 }, {
4648 'url': 'https://www.youtube.com/watch?feature=foo',
4649 'only_matching': True,
4650 }, {
4651 'url': 'https://www.youtube.com/watch?hl=en-GB',
4652 'only_matching': True,
4653 }, {
4654 'url': 'https://www.youtube.com/watch?t=2372',
4655 'only_matching': True,
4656 }]
4657
4658 def _real_extract(self, url):
4659 raise ExtractorError(
4660 'Did you forget to quote the URL? Remember that & is a meta '
4661 'character in most shells, so you want to put the URL in quotes, '
4662 'like youtube-dl '
4663 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4664 ' or simply youtube-dl BaW_jenozKc .',
4665 expected=True)
4666
4667
4668 class YoutubeClipIE(InfoExtractor):
4669 IE_NAME = 'youtube:clip'
4670 IE_DESC = False # Do not list
4671 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4672
4673 def _real_extract(self, url):
4674 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4675 return self.url_result(url, 'Generic')
4676
4677
4678 class YoutubeTruncatedIDIE(InfoExtractor):
4679 IE_NAME = 'youtube:truncated_id'
4680 IE_DESC = False # Do not list
4681 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4682
4683 _TESTS = [{
4684 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4685 'only_matching': True,
4686 }]
4687
4688 def _real_extract(self, url):
4689 video_id = self._match_id(url)
4690 raise ExtractorError(
4691 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4692 expected=True)