]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[PatreonUser] Do not capture RSS URLs
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 import base64
6 import calendar
7 import copy
8 import datetime
9 import hashlib
10 import itertools
11 import json
12 import math
13 import os.path
14 import random
15 import re
16 import time
17 import traceback
18
19 from .common import InfoExtractor, SearchInfoExtractor
20 from ..compat import (
21 compat_chr,
22 compat_HTTPError,
23 compat_parse_qs,
24 compat_str,
25 compat_urllib_parse_unquote_plus,
26 compat_urllib_parse_urlencode,
27 compat_urllib_parse_urlparse,
28 compat_urlparse,
29 )
30 from ..jsinterp import JSInterpreter
31 from ..utils import (
32 bug_reports_message,
33 bytes_to_intlist,
34 clean_html,
35 datetime_from_str,
36 dict_get,
37 error_to_compat_str,
38 ExtractorError,
39 float_or_none,
40 format_field,
41 int_or_none,
42 intlist_to_bytes,
43 is_html,
44 join_nonempty,
45 mimetype2ext,
46 network_exceptions,
47 orderedSet,
48 parse_codecs,
49 parse_count,
50 parse_duration,
51 parse_iso8601,
52 parse_qs,
53 qualities,
54 remove_end,
55 remove_start,
56 smuggle_url,
57 str_or_none,
58 str_to_int,
59 traverse_obj,
60 try_get,
61 unescapeHTML,
62 unified_strdate,
63 unsmuggle_url,
64 update_url_query,
65 url_or_none,
66 urljoin,
67 variadic,
68 )
69
70
71 def get_first(obj, keys, **kwargs):
72 return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
73
74
75 # any clients starting with _ cannot be explicity requested by the user
76 INNERTUBE_CLIENTS = {
77 'web': {
78 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
79 'INNERTUBE_CONTEXT': {
80 'client': {
81 'clientName': 'WEB',
82 'clientVersion': '2.20210622.10.00',
83 }
84 },
85 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
86 },
87 'web_embedded': {
88 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
89 'INNERTUBE_CONTEXT': {
90 'client': {
91 'clientName': 'WEB_EMBEDDED_PLAYER',
92 'clientVersion': '1.20210620.0.1',
93 },
94 },
95 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
96 },
97 'web_music': {
98 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
99 'INNERTUBE_HOST': 'music.youtube.com',
100 'INNERTUBE_CONTEXT': {
101 'client': {
102 'clientName': 'WEB_REMIX',
103 'clientVersion': '1.20210621.00.00',
104 }
105 },
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
107 },
108 'web_creator': {
109 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
110 'INNERTUBE_CONTEXT': {
111 'client': {
112 'clientName': 'WEB_CREATOR',
113 'clientVersion': '1.20210621.00.00',
114 }
115 },
116 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
117 },
118 'android': {
119 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
120 'INNERTUBE_CONTEXT': {
121 'client': {
122 'clientName': 'ANDROID',
123 'clientVersion': '16.20',
124 }
125 },
126 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
127 'REQUIRE_JS_PLAYER': False
128 },
129 'android_embedded': {
130 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
131 'INNERTUBE_CONTEXT': {
132 'client': {
133 'clientName': 'ANDROID_EMBEDDED_PLAYER',
134 'clientVersion': '16.20',
135 },
136 },
137 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
138 'REQUIRE_JS_PLAYER': False
139 },
140 'android_music': {
141 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
142 'INNERTUBE_HOST': 'music.youtube.com',
143 'INNERTUBE_CONTEXT': {
144 'client': {
145 'clientName': 'ANDROID_MUSIC',
146 'clientVersion': '4.32',
147 }
148 },
149 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
150 'REQUIRE_JS_PLAYER': False
151 },
152 'android_creator': {
153 'INNERTUBE_CONTEXT': {
154 'client': {
155 'clientName': 'ANDROID_CREATOR',
156 'clientVersion': '21.24.100',
157 },
158 },
159 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
160 'REQUIRE_JS_PLAYER': False
161 },
162 # ios has HLS live streams
163 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
164 'ios': {
165 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
166 'INNERTUBE_CONTEXT': {
167 'client': {
168 'clientName': 'IOS',
169 'clientVersion': '16.20',
170 }
171 },
172 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
173 'REQUIRE_JS_PLAYER': False
174 },
175 'ios_embedded': {
176 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
177 'INNERTUBE_CONTEXT': {
178 'client': {
179 'clientName': 'IOS_MESSAGES_EXTENSION',
180 'clientVersion': '16.20',
181 },
182 },
183 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
184 'REQUIRE_JS_PLAYER': False
185 },
186 'ios_music': {
187 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
188 'INNERTUBE_HOST': 'music.youtube.com',
189 'INNERTUBE_CONTEXT': {
190 'client': {
191 'clientName': 'IOS_MUSIC',
192 'clientVersion': '4.32',
193 },
194 },
195 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
196 'REQUIRE_JS_PLAYER': False
197 },
198 'ios_creator': {
199 'INNERTUBE_CONTEXT': {
200 'client': {
201 'clientName': 'IOS_CREATOR',
202 'clientVersion': '21.24.100',
203 },
204 },
205 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
206 'REQUIRE_JS_PLAYER': False
207 },
208 # mweb has 'ultralow' formats
209 # See: https://github.com/yt-dlp/yt-dlp/pull/557
210 'mweb': {
211 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
212 'INNERTUBE_CONTEXT': {
213 'client': {
214 'clientName': 'MWEB',
215 'clientVersion': '2.20210721.07.00',
216 }
217 },
218 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
219 },
220 }
221
222
223 def build_innertube_clients():
224 third_party = {
225 'embedUrl': 'https://google.com', # Can be any valid URL
226 }
227 base_clients = ('android', 'web', 'ios', 'mweb')
228 priority = qualities(base_clients[::-1])
229
230 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
231 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
232 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
233 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
234 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
235 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
236
237 if client in base_clients:
238 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
239 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
240 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
241 agegate_ytcfg['priority'] -= 1
242 elif client.endswith('_embedded'):
243 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
244 ytcfg['priority'] -= 2
245 else:
246 ytcfg['priority'] -= 3
247
248
249 build_innertube_clients()
250
251
252 class YoutubeBaseInfoExtractor(InfoExtractor):
253 """Provide base functions for Youtube extractors"""
254
255 _RESERVED_NAMES = (
256 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
257 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
258 r'browse|oembed|get_video_info|iframe_api|s/player|'
259 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
260
261 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
262
263 _NETRC_MACHINE = 'youtube'
264
265 # If True it will raise an error if no login info is provided
266 _LOGIN_REQUIRED = False
267
268 _INVIDIOUS_SITES = (
269 # invidious-redirect websites
270 r'(?:www\.)?redirect\.invidious\.io',
271 r'(?:(?:www|dev)\.)?invidio\.us',
272 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
273 r'(?:www\.)?invidious\.pussthecat\.org',
274 r'(?:www\.)?invidious\.zee\.li',
275 r'(?:www\.)?invidious\.ethibox\.fr',
276 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
277 # youtube-dl invidious instances list
278 r'(?:(?:www|no)\.)?invidiou\.sh',
279 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
280 r'(?:www\.)?invidious\.kabi\.tk',
281 r'(?:www\.)?invidious\.mastodon\.host',
282 r'(?:www\.)?invidious\.zapashcanon\.fr',
283 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
284 r'(?:www\.)?invidious\.tinfoil-hat\.net',
285 r'(?:www\.)?invidious\.himiko\.cloud',
286 r'(?:www\.)?invidious\.reallyancient\.tech',
287 r'(?:www\.)?invidious\.tube',
288 r'(?:www\.)?invidiou\.site',
289 r'(?:www\.)?invidious\.site',
290 r'(?:www\.)?invidious\.xyz',
291 r'(?:www\.)?invidious\.nixnet\.xyz',
292 r'(?:www\.)?invidious\.048596\.xyz',
293 r'(?:www\.)?invidious\.drycat\.fr',
294 r'(?:www\.)?inv\.skyn3t\.in',
295 r'(?:www\.)?tube\.poal\.co',
296 r'(?:www\.)?tube\.connect\.cafe',
297 r'(?:www\.)?vid\.wxzm\.sx',
298 r'(?:www\.)?vid\.mint\.lgbt',
299 r'(?:www\.)?vid\.puffyan\.us',
300 r'(?:www\.)?yewtu\.be',
301 r'(?:www\.)?yt\.elukerio\.org',
302 r'(?:www\.)?yt\.lelux\.fi',
303 r'(?:www\.)?invidious\.ggc-project\.de',
304 r'(?:www\.)?yt\.maisputain\.ovh',
305 r'(?:www\.)?ytprivate\.com',
306 r'(?:www\.)?invidious\.13ad\.de',
307 r'(?:www\.)?invidious\.toot\.koeln',
308 r'(?:www\.)?invidious\.fdn\.fr',
309 r'(?:www\.)?watch\.nettohikari\.com',
310 r'(?:www\.)?invidious\.namazso\.eu',
311 r'(?:www\.)?invidious\.silkky\.cloud',
312 r'(?:www\.)?invidious\.exonip\.de',
313 r'(?:www\.)?invidious\.riverside\.rocks',
314 r'(?:www\.)?invidious\.blamefran\.net',
315 r'(?:www\.)?invidious\.moomoo\.de',
316 r'(?:www\.)?ytb\.trom\.tf',
317 r'(?:www\.)?yt\.cyberhost\.uk',
318 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
319 r'(?:www\.)?qklhadlycap4cnod\.onion',
320 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
321 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
322 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
323 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
324 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
325 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
326 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
327 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
328 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
329 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
330 )
331
332 def _login(self):
333 """
334 Attempt to log in to YouTube.
335 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
336 """
337
338 if (self._LOGIN_REQUIRED
339 and self.get_param('cookiefile') is None
340 and self.get_param('cookiesfrombrowser') is None):
341 self.raise_login_required(
342 'Login details are needed to download this content', method='cookies')
343 username, password = self._get_login_info()
344 if username:
345 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
346
347 def _initialize_consent(self):
348 cookies = self._get_cookies('https://www.youtube.com/')
349 if cookies.get('__Secure-3PSID'):
350 return
351 consent_id = None
352 consent = cookies.get('CONSENT')
353 if consent:
354 if 'YES' in consent.value:
355 return
356 consent_id = self._search_regex(
357 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
358 if not consent_id:
359 consent_id = random.randint(100, 999)
360 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
361
362 def _real_initialize(self):
363 self._initialize_consent()
364 self._login()
365
366 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
367 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
368 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
369
370 def _get_default_ytcfg(self, client='web'):
371 return copy.deepcopy(INNERTUBE_CLIENTS[client])
372
373 def _get_innertube_host(self, client='web'):
374 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
375
376 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
377 # try_get but with fallback to default ytcfg client values when present
378 _func = lambda y: try_get(y, getter, expected_type)
379 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
380
381 def _extract_client_name(self, ytcfg, default_client='web'):
382 return self._ytcfg_get_safe(
383 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
384 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
385
386 def _extract_client_version(self, ytcfg, default_client='web'):
387 return self._ytcfg_get_safe(
388 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
389 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
390
391 def _extract_api_key(self, ytcfg=None, default_client='web'):
392 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
393
394 def _extract_context(self, ytcfg=None, default_client='web'):
395 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
396 context = _get_context(ytcfg)
397 if context:
398 return context
399
400 context = _get_context(self._get_default_ytcfg(default_client))
401 if not ytcfg:
402 return context
403
404 # Recreate the client context (required)
405 context['client'].update({
406 'clientVersion': self._extract_client_version(ytcfg, default_client),
407 'clientName': self._extract_client_name(ytcfg, default_client),
408 })
409 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
410 if visitor_data:
411 context['client']['visitorData'] = visitor_data
412 return context
413
414 _SAPISID = None
415
416 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
417 time_now = round(time.time())
418 if self._SAPISID is None:
419 yt_cookies = self._get_cookies('https://www.youtube.com')
420 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
421 # See: https://github.com/yt-dlp/yt-dlp/issues/393
422 sapisid_cookie = dict_get(
423 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
424 if sapisid_cookie and sapisid_cookie.value:
425 self._SAPISID = sapisid_cookie.value
426 self.write_debug('Extracted SAPISID cookie')
427 # SAPISID cookie is required if not already present
428 if not yt_cookies.get('SAPISID'):
429 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
430 self._set_cookie(
431 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
432 else:
433 self._SAPISID = False
434 if not self._SAPISID:
435 return None
436 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
437 sapisidhash = hashlib.sha1(
438 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
439 return f'SAPISIDHASH {time_now}_{sapisidhash}'
440
441 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
442 note='Downloading API JSON', errnote='Unable to download API page',
443 context=None, api_key=None, api_hostname=None, default_client='web'):
444
445 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
446 data.update(query)
447 real_headers = self.generate_api_headers(default_client=default_client)
448 real_headers.update({'content-type': 'application/json'})
449 if headers:
450 real_headers.update(headers)
451 return self._download_json(
452 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
453 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
454 data=json.dumps(data).encode('utf8'), headers=real_headers,
455 query={'key': api_key or self._extract_api_key()})
456
457 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
458 data = self._search_regex(
459 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
460 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
461 if data:
462 return self._parse_json(data, item_id, fatal=fatal)
463
464 @staticmethod
465 def _extract_session_index(*data):
466 """
467 Index of current account in account list.
468 See: https://github.com/yt-dlp/yt-dlp/pull/519
469 """
470 for ytcfg in data:
471 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
472 if session_index is not None:
473 return session_index
474
475 # Deprecated?
476 def _extract_identity_token(self, ytcfg=None, webpage=None):
477 if ytcfg:
478 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
479 if token:
480 return token
481 if webpage:
482 return self._search_regex(
483 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
484 'identity token', default=None, fatal=False)
485
486 @staticmethod
487 def _extract_account_syncid(*args):
488 """
489 Extract syncId required to download private playlists of secondary channels
490 @params response and/or ytcfg
491 """
492 for data in args:
493 # ytcfg includes channel_syncid if on secondary channel
494 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
495 if delegated_sid:
496 return delegated_sid
497 sync_ids = (try_get(
498 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
499 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
500 if len(sync_ids) >= 2 and sync_ids[1]:
501 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
502 # and just "user_syncid||" for primary channel. We only want the channel_syncid
503 return sync_ids[0]
504
505 @staticmethod
506 def _extract_visitor_data(*args):
507 """
508 Extracts visitorData from an API response or ytcfg
509 Appears to be used to track session state
510 """
511 return get_first(
512 args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
513 expected_type=str)
514
515 @property
516 def is_authenticated(self):
517 return bool(self._generate_sapisidhash_header())
518
519 def extract_ytcfg(self, video_id, webpage):
520 if not webpage:
521 return {}
522 return self._parse_json(
523 self._search_regex(
524 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
525 default='{}'), video_id, fatal=False) or {}
526
527 def generate_api_headers(
528 self, *, ytcfg=None, account_syncid=None, session_index=None,
529 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
530
531 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
532 headers = {
533 'X-YouTube-Client-Name': compat_str(
534 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
535 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
536 'Origin': origin,
537 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
538 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
539 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
540 }
541 if session_index is None:
542 session_index = self._extract_session_index(ytcfg)
543 if account_syncid or session_index is not None:
544 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
545
546 auth = self._generate_sapisidhash_header(origin)
547 if auth is not None:
548 headers['Authorization'] = auth
549 headers['X-Origin'] = origin
550 return {h: v for h, v in headers.items() if v is not None}
551
552 @staticmethod
553 def _build_api_continuation_query(continuation, ctp=None):
554 query = {
555 'continuation': continuation
556 }
557 # TODO: Inconsistency with clickTrackingParams.
558 # Currently we have a fixed ctp contained within context (from ytcfg)
559 # and a ctp in root query for continuation.
560 if ctp:
561 query['clickTracking'] = {'clickTrackingParams': ctp}
562 return query
563
564 @classmethod
565 def _extract_next_continuation_data(cls, renderer):
566 next_continuation = try_get(
567 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
568 lambda x: x['continuation']['reloadContinuationData']), dict)
569 if not next_continuation:
570 return
571 continuation = next_continuation.get('continuation')
572 if not continuation:
573 return
574 ctp = next_continuation.get('clickTrackingParams')
575 return cls._build_api_continuation_query(continuation, ctp)
576
577 @classmethod
578 def _extract_continuation_ep_data(cls, continuation_ep: dict):
579 if isinstance(continuation_ep, dict):
580 continuation = try_get(
581 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
582 if not continuation:
583 return
584 ctp = continuation_ep.get('clickTrackingParams')
585 return cls._build_api_continuation_query(continuation, ctp)
586
587 @classmethod
588 def _extract_continuation(cls, renderer):
589 next_continuation = cls._extract_next_continuation_data(renderer)
590 if next_continuation:
591 return next_continuation
592
593 contents = []
594 for key in ('contents', 'items'):
595 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
596
597 for content in contents:
598 if not isinstance(content, dict):
599 continue
600 continuation_ep = try_get(
601 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
602 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
603 dict)
604 continuation = cls._extract_continuation_ep_data(continuation_ep)
605 if continuation:
606 return continuation
607
608 @classmethod
609 def _extract_alerts(cls, data):
610 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
611 if not isinstance(alert_dict, dict):
612 continue
613 for alert in alert_dict.values():
614 alert_type = alert.get('type')
615 if not alert_type:
616 continue
617 message = cls._get_text(alert, 'text')
618 if message:
619 yield alert_type, message
620
621 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
622 errors = []
623 warnings = []
624 for alert_type, alert_message in alerts:
625 if alert_type.lower() == 'error' and fatal:
626 errors.append([alert_type, alert_message])
627 else:
628 warnings.append([alert_type, alert_message])
629
630 for alert_type, alert_message in (warnings + errors[:-1]):
631 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
632 if errors:
633 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
634
635 def _extract_and_report_alerts(self, data, *args, **kwargs):
636 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
637
638 def _extract_badges(self, renderer: dict):
639 badges = set()
640 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
641 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
642 if label:
643 badges.add(label.lower())
644 return badges
645
646 @staticmethod
647 def _get_text(data, *path_list, max_runs=None):
648 for path in path_list or [None]:
649 if path is None:
650 obj = [data]
651 else:
652 obj = traverse_obj(data, path, default=[])
653 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
654 obj = [obj]
655 for item in obj:
656 text = try_get(item, lambda x: x['simpleText'], compat_str)
657 if text:
658 return text
659 runs = try_get(item, lambda x: x['runs'], list) or []
660 if not runs and isinstance(item, list):
661 runs = item
662
663 runs = runs[:min(len(runs), max_runs or len(runs))]
664 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
665 if text:
666 return text
667
668 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
669 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
670 default_client='web'):
671 response = None
672 last_error = None
673 count = -1
674 retries = self.get_param('extractor_retries', 3)
675 if check_get_keys is None:
676 check_get_keys = []
677 while count < retries:
678 count += 1
679 if last_error:
680 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
681 try:
682 response = self._call_api(
683 ep=ep, fatal=True, headers=headers,
684 video_id=item_id, query=query,
685 context=self._extract_context(ytcfg, default_client),
686 api_key=self._extract_api_key(ytcfg, default_client),
687 api_hostname=api_hostname, default_client=default_client,
688 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
689 except ExtractorError as e:
690 if isinstance(e.cause, network_exceptions):
691 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
692 e.cause.seek(0)
693 yt_error = try_get(
694 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
695 lambda x: x['error']['message'], compat_str)
696 if yt_error:
697 self._report_alerts([('ERROR', yt_error)], fatal=False)
698 # Downloading page may result in intermittent 5xx HTTP error
699 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
700 # We also want to catch all other network exceptions since errors in later pages can be troublesome
701 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
702 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
703 last_error = error_to_compat_str(e.cause or e.msg)
704 if count < retries:
705 continue
706 if fatal:
707 raise
708 else:
709 self.report_warning(error_to_compat_str(e))
710 return
711
712 else:
713 try:
714 self._extract_and_report_alerts(response, only_once=True)
715 except ExtractorError as e:
716 # YouTube servers may return errors we want to retry on in a 200 OK response
717 # See: https://github.com/yt-dlp/yt-dlp/issues/839
718 if 'unknown error' in e.msg.lower():
719 last_error = e.msg
720 continue
721 if fatal:
722 raise
723 self.report_warning(error_to_compat_str(e))
724 return
725 if not check_get_keys or dict_get(response, check_get_keys):
726 break
727 # Youtube sometimes sends incomplete data
728 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
729 last_error = 'Incomplete data received'
730 if count >= retries:
731 if fatal:
732 raise ExtractorError(last_error)
733 else:
734 self.report_warning(last_error)
735 return
736 return response
737
738 @staticmethod
739 def is_music_url(url):
740 return re.match(r'https?://music\.youtube\.com/', url) is not None
741
742 def _extract_video(self, renderer):
743 video_id = renderer.get('videoId')
744 title = self._get_text(renderer, 'title')
745 description = self._get_text(renderer, 'descriptionSnippet')
746 duration = parse_duration(self._get_text(
747 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
748 view_count_text = self._get_text(renderer, 'viewCountText') or ''
749 view_count = str_to_int(self._search_regex(
750 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
751 'view count', default=None))
752
753 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
754
755 return {
756 '_type': 'url',
757 'ie_key': YoutubeIE.ie_key(),
758 'id': video_id,
759 'url': f'https://www.youtube.com/watch?v={video_id}',
760 'title': title,
761 'description': description,
762 'duration': duration,
763 'view_count': view_count,
764 'uploader': uploader,
765 }
766
767
768 class YoutubeIE(YoutubeBaseInfoExtractor):
769 IE_DESC = 'YouTube'
770 _VALID_URL = r"""(?x)^
771 (
772 (?:https?://|//) # http(s):// or protocol-independent URL
773 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
774 (?:www\.)?deturl\.com/www\.youtube\.com|
775 (?:www\.)?pwnyoutube\.com|
776 (?:www\.)?hooktube\.com|
777 (?:www\.)?yourepeat\.com|
778 tube\.majestyc\.net|
779 %(invidious)s|
780 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
781 (?:.*?\#/)? # handle anchor (#/) redirect urls
782 (?: # the various things that can precede the ID:
783 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
784 |(?: # or the v= param in all its forms
785 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
786 (?:\?|\#!?) # the params delimiter ? or # or #!
787 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
788 v=
789 )
790 ))
791 |(?:
792 youtu\.be| # just youtu.be/xxxx
793 vid\.plus| # or vid.plus/xxxx
794 zwearz\.com/watch| # or zwearz.com/watch/xxxx
795 %(invidious)s
796 )/
797 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
798 )
799 )? # all until now is optional -> you can pass the naked ID
800 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
801 (?(1).+)? # if we found the ID, everything can follow
802 (?:\#|$)""" % {
803 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
804 }
805 _PLAYER_INFO_RE = (
806 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
807 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
808 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
809 )
810 _formats = {
811 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
812 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
813 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
814 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
815 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
816 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
817 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
818 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
819 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
820 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
821 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
822 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
823 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
824 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
825 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
826 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
827 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
828 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
829
830
831 # 3D videos
832 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
833 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
834 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
835 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
836 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
837 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
838 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
839
840 # Apple HTTP Live Streaming
841 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
842 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
843 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
844 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
845 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
846 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
847 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
848 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
849
850 # DASH mp4 video
851 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
852 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
853 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
854 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
855 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
856 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
857 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
858 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
859 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
860 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
861 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
862 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
863
864 # Dash mp4 audio
865 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
866 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
867 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
868 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
869 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
870 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
871 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
872
873 # Dash webm
874 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
875 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
876 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
877 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
878 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
879 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
880 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
881 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
882 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
883 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
884 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
885 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
886 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
887 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
888 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
889 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
890 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
891 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
892 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
893 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
894 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
895 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
896
897 # Dash webm audio
898 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
899 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
900
901 # Dash webm audio with opus inside
902 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
903 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
904 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
905
906 # RTMP (unnamed)
907 '_rtmp': {'protocol': 'rtmp'},
908
909 # av01 video only formats sometimes served with "unknown" codecs
910 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
911 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
912 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
913 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
914 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
915 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
916 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
917 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
918 }
919 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
920
921 _GEO_BYPASS = False
922
923 IE_NAME = 'youtube'
924 _TESTS = [
925 {
926 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
927 'info_dict': {
928 'id': 'BaW_jenozKc',
929 'ext': 'mp4',
930 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
931 'uploader': 'Philipp Hagemeister',
932 'uploader_id': 'phihag',
933 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
934 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
935 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
936 'upload_date': '20121002',
937 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
938 'categories': ['Science & Technology'],
939 'tags': ['youtube-dl'],
940 'duration': 10,
941 'view_count': int,
942 'like_count': int,
943 'dislike_count': int,
944 'start_time': 1,
945 'end_time': 9,
946 }
947 },
948 {
949 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
950 'note': 'Embed-only video (#1746)',
951 'info_dict': {
952 'id': 'yZIXLfi8CZQ',
953 'ext': 'mp4',
954 'upload_date': '20120608',
955 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
956 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
957 'uploader': 'SET India',
958 'uploader_id': 'setindia',
959 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
960 'age_limit': 18,
961 },
962 'skip': 'Private video',
963 },
964 {
965 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
966 'note': 'Use the first video ID in the URL',
967 'info_dict': {
968 'id': 'BaW_jenozKc',
969 'ext': 'mp4',
970 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
971 'uploader': 'Philipp Hagemeister',
972 'uploader_id': 'phihag',
973 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
974 'upload_date': '20121002',
975 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
976 'categories': ['Science & Technology'],
977 'tags': ['youtube-dl'],
978 'duration': 10,
979 'view_count': int,
980 'like_count': int,
981 'dislike_count': int,
982 },
983 'params': {
984 'skip_download': True,
985 },
986 },
987 {
988 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
989 'note': '256k DASH audio (format 141) via DASH manifest',
990 'info_dict': {
991 'id': 'a9LDPn-MO4I',
992 'ext': 'm4a',
993 'upload_date': '20121002',
994 'uploader_id': '8KVIDEO',
995 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
996 'description': '',
997 'uploader': '8KVIDEO',
998 'title': 'UHDTV TEST 8K VIDEO.mp4'
999 },
1000 'params': {
1001 'youtube_include_dash_manifest': True,
1002 'format': '141',
1003 },
1004 'skip': 'format 141 not served anymore',
1005 },
1006 # DASH manifest with encrypted signature
1007 {
1008 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1009 'info_dict': {
1010 'id': 'IB3lcPjvWLA',
1011 'ext': 'm4a',
1012 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1013 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1014 'duration': 244,
1015 'uploader': 'AfrojackVEVO',
1016 'uploader_id': 'AfrojackVEVO',
1017 'upload_date': '20131011',
1018 'abr': 129.495,
1019 },
1020 'params': {
1021 'youtube_include_dash_manifest': True,
1022 'format': '141/bestaudio[ext=m4a]',
1023 },
1024 },
1025 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1026 {
1027 'note': 'Embed allowed age-gate video',
1028 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1029 'info_dict': {
1030 'id': 'HtVdAasjOgU',
1031 'ext': 'mp4',
1032 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1033 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1034 'duration': 142,
1035 'uploader': 'The Witcher',
1036 'uploader_id': 'WitcherGame',
1037 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1038 'upload_date': '20140605',
1039 'age_limit': 18,
1040 },
1041 },
1042 {
1043 'note': 'Age-gate video with embed allowed in public site',
1044 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1045 'info_dict': {
1046 'id': 'HsUATh_Nc2U',
1047 'ext': 'mp4',
1048 'title': 'Godzilla 2 (Official Video)',
1049 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1050 'upload_date': '20200408',
1051 'uploader_id': 'FlyingKitty900',
1052 'uploader': 'FlyingKitty',
1053 'age_limit': 18,
1054 },
1055 },
1056 {
1057 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1058 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1059 'info_dict': {
1060 'id': 'Tq92D6wQ1mg',
1061 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1062 'ext': 'mp4',
1063 'upload_date': '20191227',
1064 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1065 'uploader': 'Projekt Melody',
1066 'description': 'md5:17eccca93a786d51bc67646756894066',
1067 'age_limit': 18,
1068 },
1069 },
1070 {
1071 'note': 'Non-Agegated non-embeddable video',
1072 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1073 'info_dict': {
1074 'id': 'MeJVWBSsPAY',
1075 'ext': 'mp4',
1076 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1077 'uploader': 'Herr Lurik',
1078 'uploader_id': 'st3in234',
1079 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1080 'upload_date': '20130730',
1081 },
1082 },
1083 {
1084 'note': 'Non-bypassable age-gated video',
1085 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1086 'only_matching': True,
1087 },
1088 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1089 # YouTube Red ad is not captured for creator
1090 {
1091 'url': '__2ABJjxzNo',
1092 'info_dict': {
1093 'id': '__2ABJjxzNo',
1094 'ext': 'mp4',
1095 'duration': 266,
1096 'upload_date': '20100430',
1097 'uploader_id': 'deadmau5',
1098 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1099 'creator': 'deadmau5',
1100 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1101 'uploader': 'deadmau5',
1102 'title': 'Deadmau5 - Some Chords (HD)',
1103 'alt_title': 'Some Chords',
1104 },
1105 'expected_warnings': [
1106 'DASH manifest missing',
1107 ]
1108 },
1109 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1110 {
1111 'url': 'lqQg6PlCWgI',
1112 'info_dict': {
1113 'id': 'lqQg6PlCWgI',
1114 'ext': 'mp4',
1115 'duration': 6085,
1116 'upload_date': '20150827',
1117 'uploader_id': 'olympic',
1118 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1119 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1120 'uploader': 'Olympics',
1121 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1122 },
1123 'params': {
1124 'skip_download': 'requires avconv',
1125 }
1126 },
1127 # Non-square pixels
1128 {
1129 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1130 'info_dict': {
1131 'id': '_b-2C3KPAM0',
1132 'ext': 'mp4',
1133 'stretched_ratio': 16 / 9.,
1134 'duration': 85,
1135 'upload_date': '20110310',
1136 'uploader_id': 'AllenMeow',
1137 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1138 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1139 'uploader': '孫ᄋᄅ',
1140 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1141 },
1142 },
1143 # url_encoded_fmt_stream_map is empty string
1144 {
1145 'url': 'qEJwOuvDf7I',
1146 'info_dict': {
1147 'id': 'qEJwOuvDf7I',
1148 'ext': 'webm',
1149 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1150 'description': '',
1151 'upload_date': '20150404',
1152 'uploader_id': 'spbelect',
1153 'uploader': 'Наблюдатели Петербурга',
1154 },
1155 'params': {
1156 'skip_download': 'requires avconv',
1157 },
1158 'skip': 'This live event has ended.',
1159 },
1160 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1161 {
1162 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1163 'info_dict': {
1164 'id': 'FIl7x6_3R5Y',
1165 'ext': 'webm',
1166 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1167 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1168 'duration': 220,
1169 'upload_date': '20150625',
1170 'uploader_id': 'dorappi2000',
1171 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1172 'uploader': 'dorappi2000',
1173 'formats': 'mincount:31',
1174 },
1175 'skip': 'not actual anymore',
1176 },
1177 # DASH manifest with segment_list
1178 {
1179 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1180 'md5': '8ce563a1d667b599d21064e982ab9e31',
1181 'info_dict': {
1182 'id': 'CsmdDsKjzN8',
1183 'ext': 'mp4',
1184 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1185 'uploader': 'Airtek',
1186 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1187 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1188 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1189 },
1190 'params': {
1191 'youtube_include_dash_manifest': True,
1192 'format': '135', # bestvideo
1193 },
1194 'skip': 'This live event has ended.',
1195 },
1196 {
1197 # Multifeed videos (multiple cameras), URL is for Main Camera
1198 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1199 'info_dict': {
1200 'id': 'jvGDaLqkpTg',
1201 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1202 'description': 'md5:e03b909557865076822aa169218d6a5d',
1203 },
1204 'playlist': [{
1205 'info_dict': {
1206 'id': 'jvGDaLqkpTg',
1207 'ext': 'mp4',
1208 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1209 'description': 'md5:e03b909557865076822aa169218d6a5d',
1210 'duration': 10643,
1211 'upload_date': '20161111',
1212 'uploader': 'Team PGP',
1213 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1214 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1215 },
1216 }, {
1217 'info_dict': {
1218 'id': '3AKt1R1aDnw',
1219 'ext': 'mp4',
1220 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1221 'description': 'md5:e03b909557865076822aa169218d6a5d',
1222 'duration': 10991,
1223 'upload_date': '20161111',
1224 'uploader': 'Team PGP',
1225 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1226 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1227 },
1228 }, {
1229 'info_dict': {
1230 'id': 'RtAMM00gpVc',
1231 'ext': 'mp4',
1232 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1233 'description': 'md5:e03b909557865076822aa169218d6a5d',
1234 'duration': 10995,
1235 'upload_date': '20161111',
1236 'uploader': 'Team PGP',
1237 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1238 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1239 },
1240 }, {
1241 'info_dict': {
1242 'id': '6N2fdlP3C5U',
1243 'ext': 'mp4',
1244 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1245 'description': 'md5:e03b909557865076822aa169218d6a5d',
1246 'duration': 10990,
1247 'upload_date': '20161111',
1248 'uploader': 'Team PGP',
1249 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1250 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1251 },
1252 }],
1253 'params': {
1254 'skip_download': True,
1255 },
1256 'skip': 'Not multifeed anymore',
1257 },
1258 {
1259 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1260 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1261 'info_dict': {
1262 'id': 'gVfLd0zydlo',
1263 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1264 },
1265 'playlist_count': 2,
1266 'skip': 'Not multifeed anymore',
1267 },
1268 {
1269 'url': 'https://vid.plus/FlRa-iH7PGw',
1270 'only_matching': True,
1271 },
1272 {
1273 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1274 'only_matching': True,
1275 },
1276 {
1277 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1278 # Also tests cut-off URL expansion in video description (see
1279 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1280 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1281 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1282 'info_dict': {
1283 'id': 'lsguqyKfVQg',
1284 'ext': 'mp4',
1285 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1286 'alt_title': 'Dark Walk',
1287 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1288 'duration': 133,
1289 'upload_date': '20151119',
1290 'uploader_id': 'IronSoulElf',
1291 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1292 'uploader': 'IronSoulElf',
1293 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1294 'track': 'Dark Walk',
1295 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1296 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1297 },
1298 'params': {
1299 'skip_download': True,
1300 },
1301 },
1302 {
1303 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1304 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1305 'only_matching': True,
1306 },
1307 {
1308 # Video with yt:stretch=17:0
1309 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1310 'info_dict': {
1311 'id': 'Q39EVAstoRM',
1312 'ext': 'mp4',
1313 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1314 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1315 'upload_date': '20151107',
1316 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1317 'uploader': 'CH GAMER DROID',
1318 },
1319 'params': {
1320 'skip_download': True,
1321 },
1322 'skip': 'This video does not exist.',
1323 },
1324 {
1325 # Video with incomplete 'yt:stretch=16:'
1326 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1327 'only_matching': True,
1328 },
1329 {
1330 # Video licensed under Creative Commons
1331 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1332 'info_dict': {
1333 'id': 'M4gD1WSo5mA',
1334 'ext': 'mp4',
1335 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1336 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1337 'duration': 721,
1338 'upload_date': '20150127',
1339 'uploader_id': 'BerkmanCenter',
1340 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1341 'uploader': 'The Berkman Klein Center for Internet & Society',
1342 'license': 'Creative Commons Attribution license (reuse allowed)',
1343 },
1344 'params': {
1345 'skip_download': True,
1346 },
1347 },
1348 {
1349 # Channel-like uploader_url
1350 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1351 'info_dict': {
1352 'id': 'eQcmzGIKrzg',
1353 'ext': 'mp4',
1354 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1355 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1356 'duration': 4060,
1357 'upload_date': '20151119',
1358 'uploader': 'Bernie Sanders',
1359 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1360 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1361 'license': 'Creative Commons Attribution license (reuse allowed)',
1362 },
1363 'params': {
1364 'skip_download': True,
1365 },
1366 },
1367 {
1368 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1369 'only_matching': True,
1370 },
1371 {
1372 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1373 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1374 'only_matching': True,
1375 },
1376 {
1377 # Rental video preview
1378 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1379 'info_dict': {
1380 'id': 'uGpuVWrhIzE',
1381 'ext': 'mp4',
1382 'title': 'Piku - Trailer',
1383 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1384 'upload_date': '20150811',
1385 'uploader': 'FlixMatrix',
1386 'uploader_id': 'FlixMatrixKaravan',
1387 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1388 'license': 'Standard YouTube License',
1389 },
1390 'params': {
1391 'skip_download': True,
1392 },
1393 'skip': 'This video is not available.',
1394 },
1395 {
1396 # YouTube Red video with episode data
1397 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1398 'info_dict': {
1399 'id': 'iqKdEhx-dD4',
1400 'ext': 'mp4',
1401 'title': 'Isolation - Mind Field (Ep 1)',
1402 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1403 'duration': 2085,
1404 'upload_date': '20170118',
1405 'uploader': 'Vsauce',
1406 'uploader_id': 'Vsauce',
1407 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1408 'series': 'Mind Field',
1409 'season_number': 1,
1410 'episode_number': 1,
1411 },
1412 'params': {
1413 'skip_download': True,
1414 },
1415 'expected_warnings': [
1416 'Skipping DASH manifest',
1417 ],
1418 },
1419 {
1420 # The following content has been identified by the YouTube community
1421 # as inappropriate or offensive to some audiences.
1422 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1423 'info_dict': {
1424 'id': '6SJNVb0GnPI',
1425 'ext': 'mp4',
1426 'title': 'Race Differences in Intelligence',
1427 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1428 'duration': 965,
1429 'upload_date': '20140124',
1430 'uploader': 'New Century Foundation',
1431 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1432 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1433 },
1434 'params': {
1435 'skip_download': True,
1436 },
1437 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1438 },
1439 {
1440 # itag 212
1441 'url': '1t24XAntNCY',
1442 'only_matching': True,
1443 },
1444 {
1445 # geo restricted to JP
1446 'url': 'sJL6WA-aGkQ',
1447 'only_matching': True,
1448 },
1449 {
1450 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1451 'only_matching': True,
1452 },
1453 {
1454 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1455 'only_matching': True,
1456 },
1457 {
1458 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1459 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1460 'only_matching': True,
1461 },
1462 {
1463 # DRM protected
1464 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1465 'only_matching': True,
1466 },
1467 {
1468 # Video with unsupported adaptive stream type formats
1469 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1470 'info_dict': {
1471 'id': 'Z4Vy8R84T1U',
1472 'ext': 'mp4',
1473 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1474 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1475 'duration': 433,
1476 'upload_date': '20130923',
1477 'uploader': 'Amelia Putri Harwita',
1478 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1479 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1480 'formats': 'maxcount:10',
1481 },
1482 'params': {
1483 'skip_download': True,
1484 'youtube_include_dash_manifest': False,
1485 },
1486 'skip': 'not actual anymore',
1487 },
1488 {
1489 # Youtube Music Auto-generated description
1490 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1491 'info_dict': {
1492 'id': 'MgNrAu2pzNs',
1493 'ext': 'mp4',
1494 'title': 'Voyeur Girl',
1495 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1496 'upload_date': '20190312',
1497 'uploader': 'Stephen - Topic',
1498 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1499 'artist': 'Stephen',
1500 'track': 'Voyeur Girl',
1501 'album': 'it\'s too much love to know my dear',
1502 'release_date': '20190313',
1503 'release_year': 2019,
1504 },
1505 'params': {
1506 'skip_download': True,
1507 },
1508 },
1509 {
1510 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1511 'only_matching': True,
1512 },
1513 {
1514 # invalid -> valid video id redirection
1515 'url': 'DJztXj2GPfl',
1516 'info_dict': {
1517 'id': 'DJztXj2GPfk',
1518 'ext': 'mp4',
1519 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1520 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1521 'upload_date': '20090125',
1522 'uploader': 'Prochorowka',
1523 'uploader_id': 'Prochorowka',
1524 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1525 'artist': 'Panjabi MC',
1526 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1527 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1528 },
1529 'params': {
1530 'skip_download': True,
1531 },
1532 'skip': 'Video unavailable',
1533 },
1534 {
1535 # empty description results in an empty string
1536 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1537 'info_dict': {
1538 'id': 'x41yOUIvK2k',
1539 'ext': 'mp4',
1540 'title': 'IMG 3456',
1541 'description': '',
1542 'upload_date': '20170613',
1543 'uploader_id': 'ElevageOrVert',
1544 'uploader': 'ElevageOrVert',
1545 },
1546 'params': {
1547 'skip_download': True,
1548 },
1549 },
1550 {
1551 # with '};' inside yt initial data (see [1])
1552 # see [2] for an example with '};' inside ytInitialPlayerResponse
1553 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1554 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1555 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1556 'info_dict': {
1557 'id': 'CHqg6qOn4no',
1558 'ext': 'mp4',
1559 'title': 'Part 77 Sort a list of simple types in c#',
1560 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1561 'upload_date': '20130831',
1562 'uploader_id': 'kudvenkat',
1563 'uploader': 'kudvenkat',
1564 },
1565 'params': {
1566 'skip_download': True,
1567 },
1568 },
1569 {
1570 # another example of '};' in ytInitialData
1571 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1572 'only_matching': True,
1573 },
1574 {
1575 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1576 'only_matching': True,
1577 },
1578 {
1579 # https://github.com/ytdl-org/youtube-dl/pull/28094
1580 'url': 'OtqTfy26tG0',
1581 'info_dict': {
1582 'id': 'OtqTfy26tG0',
1583 'ext': 'mp4',
1584 'title': 'Burn Out',
1585 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1586 'upload_date': '20141120',
1587 'uploader': 'The Cinematic Orchestra - Topic',
1588 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1589 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1590 'artist': 'The Cinematic Orchestra',
1591 'track': 'Burn Out',
1592 'album': 'Every Day',
1593 'release_data': None,
1594 'release_year': None,
1595 },
1596 'params': {
1597 'skip_download': True,
1598 },
1599 },
1600 {
1601 # controversial video, only works with bpctr when authenticated with cookies
1602 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1603 'only_matching': True,
1604 },
1605 {
1606 # controversial video, requires bpctr/contentCheckOk
1607 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1608 'info_dict': {
1609 'id': 'SZJvDhaSDnc',
1610 'ext': 'mp4',
1611 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1612 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1613 'uploader': 'CBS This Morning',
1614 'uploader_id': 'CBSThisMorning',
1615 'upload_date': '20140716',
1616 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1617 }
1618 },
1619 {
1620 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1621 'url': 'cBvYw8_A0vQ',
1622 'info_dict': {
1623 'id': 'cBvYw8_A0vQ',
1624 'ext': 'mp4',
1625 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1626 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1627 'upload_date': '20201120',
1628 'uploader': 'Walk around Japan',
1629 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1630 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1631 },
1632 'params': {
1633 'skip_download': True,
1634 },
1635 }, {
1636 # Has multiple audio streams
1637 'url': 'WaOKSUlf4TM',
1638 'only_matching': True
1639 }, {
1640 # Requires Premium: has format 141 when requested using YTM url
1641 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1642 'only_matching': True
1643 }, {
1644 # multiple subtitles with same lang_code
1645 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1646 'only_matching': True,
1647 }, {
1648 # Force use android client fallback
1649 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1650 'info_dict': {
1651 'id': 'YOelRv7fMxY',
1652 'title': 'DIGGING A SECRET TUNNEL Part 1',
1653 'ext': '3gp',
1654 'upload_date': '20210624',
1655 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1656 'uploader': 'colinfurze',
1657 'uploader_id': 'colinfurze',
1658 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1659 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1660 },
1661 'params': {
1662 'format': '17', # 3gp format available on android
1663 'extractor_args': {'youtube': {'player_client': ['android']}},
1664 },
1665 },
1666 {
1667 # Skip download of additional client configs (remix client config in this case)
1668 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1669 'only_matching': True,
1670 'params': {
1671 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1672 },
1673 }, {
1674 # shorts
1675 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1676 'only_matching': True,
1677 }, {
1678 'note': 'Storyboards',
1679 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1680 'info_dict': {
1681 'id': '5KLPxDtMqe8',
1682 'ext': 'mhtml',
1683 'format_id': 'sb0',
1684 'title': 'Your Brain is Plastic',
1685 'uploader_id': 'scishow',
1686 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1687 'upload_date': '20140324',
1688 'uploader': 'SciShow',
1689 }, 'params': {'format': 'mhtml', 'skip_download': True}
1690 }
1691 ]
1692
1693 @classmethod
1694 def suitable(cls, url):
1695 from ..utils import parse_qs
1696
1697 qs = parse_qs(url)
1698 if qs.get('list', [None])[0]:
1699 return False
1700 return super(YoutubeIE, cls).suitable(url)
1701
1702 def __init__(self, *args, **kwargs):
1703 super(YoutubeIE, self).__init__(*args, **kwargs)
1704 self._code_cache = {}
1705 self._player_cache = {}
1706
1707 def _extract_player_url(self, *ytcfgs, webpage=None):
1708 player_url = traverse_obj(
1709 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1710 get_all=False, expected_type=compat_str)
1711 if not player_url:
1712 return
1713 if player_url.startswith('//'):
1714 player_url = 'https:' + player_url
1715 elif not re.match(r'https?://', player_url):
1716 player_url = compat_urlparse.urljoin(
1717 'https://www.youtube.com', player_url)
1718 return player_url
1719
1720 def _download_player_url(self, video_id, fatal=False):
1721 res = self._download_webpage(
1722 'https://www.youtube.com/iframe_api',
1723 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1724 if res:
1725 player_version = self._search_regex(
1726 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1727 if player_version:
1728 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1729
1730 def _signature_cache_id(self, example_sig):
1731 """ Return a string representation of a signature """
1732 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1733
1734 @classmethod
1735 def _extract_player_info(cls, player_url):
1736 for player_re in cls._PLAYER_INFO_RE:
1737 id_m = re.search(player_re, player_url)
1738 if id_m:
1739 break
1740 else:
1741 raise ExtractorError('Cannot identify player %r' % player_url)
1742 return id_m.group('id')
1743
1744 def _load_player(self, video_id, player_url, fatal=True):
1745 player_id = self._extract_player_info(player_url)
1746 if player_id not in self._code_cache:
1747 code = self._download_webpage(
1748 player_url, video_id, fatal=fatal,
1749 note='Downloading player ' + player_id,
1750 errnote='Download of %s failed' % player_url)
1751 if code:
1752 self._code_cache[player_id] = code
1753 return self._code_cache.get(player_id)
1754
1755 def _extract_signature_function(self, video_id, player_url, example_sig):
1756 player_id = self._extract_player_info(player_url)
1757
1758 # Read from filesystem cache
1759 func_id = 'js_%s_%s' % (
1760 player_id, self._signature_cache_id(example_sig))
1761 assert os.path.basename(func_id) == func_id
1762
1763 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1764 if cache_spec is not None:
1765 return lambda s: ''.join(s[i] for i in cache_spec)
1766
1767 code = self._load_player(video_id, player_url)
1768 if code:
1769 res = self._parse_sig_js(code)
1770
1771 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1772 cache_res = res(test_string)
1773 cache_spec = [ord(c) for c in cache_res]
1774
1775 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1776 return res
1777
1778 def _print_sig_code(self, func, example_sig):
1779 if not self.get_param('youtube_print_sig_code'):
1780 return
1781
1782 def gen_sig_code(idxs):
1783 def _genslice(start, end, step):
1784 starts = '' if start == 0 else str(start)
1785 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1786 steps = '' if step == 1 else (':%d' % step)
1787 return 's[%s%s%s]' % (starts, ends, steps)
1788
1789 step = None
1790 # Quelch pyflakes warnings - start will be set when step is set
1791 start = '(Never used)'
1792 for i, prev in zip(idxs[1:], idxs[:-1]):
1793 if step is not None:
1794 if i - prev == step:
1795 continue
1796 yield _genslice(start, prev, step)
1797 step = None
1798 continue
1799 if i - prev in [-1, 1]:
1800 step = i - prev
1801 start = prev
1802 continue
1803 else:
1804 yield 's[%d]' % prev
1805 if step is None:
1806 yield 's[%d]' % i
1807 else:
1808 yield _genslice(start, i, step)
1809
1810 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1811 cache_res = func(test_string)
1812 cache_spec = [ord(c) for c in cache_res]
1813 expr_code = ' + '.join(gen_sig_code(cache_spec))
1814 signature_id_tuple = '(%s)' % (
1815 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1816 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1817 ' return %s\n') % (signature_id_tuple, expr_code)
1818 self.to_screen('Extracted signature function:\n' + code)
1819
1820 def _parse_sig_js(self, jscode):
1821 funcname = self._search_regex(
1822 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1823 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1824 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1825 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1826 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1827 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1828 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1829 # Obsolete patterns
1830 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1831 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1832 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1833 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1834 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1835 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1836 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1837 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1838 jscode, 'Initial JS player signature function name', group='sig')
1839
1840 jsi = JSInterpreter(jscode)
1841 initial_function = jsi.extract_function(funcname)
1842 return lambda s: initial_function([s])
1843
1844 def _decrypt_signature(self, s, video_id, player_url):
1845 """Turn the encrypted s field into a working signature"""
1846
1847 if player_url is None:
1848 raise ExtractorError('Cannot decrypt signature without player_url')
1849
1850 try:
1851 player_id = (player_url, self._signature_cache_id(s))
1852 if player_id not in self._player_cache:
1853 func = self._extract_signature_function(
1854 video_id, player_url, s
1855 )
1856 self._player_cache[player_id] = func
1857 func = self._player_cache[player_id]
1858 self._print_sig_code(func, s)
1859 return func(s)
1860 except Exception as e:
1861 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
1862
1863 def _decrypt_nsig(self, s, video_id, player_url):
1864 """Turn the encrypted n field into a working signature"""
1865 if player_url is None:
1866 raise ExtractorError('Cannot decrypt nsig without player_url')
1867 if player_url.startswith('//'):
1868 player_url = 'https:' + player_url
1869 elif not re.match(r'https?://', player_url):
1870 player_url = compat_urlparse.urljoin(
1871 'https://www.youtube.com', player_url)
1872
1873 sig_id = ('nsig_value', s)
1874 if sig_id in self._player_cache:
1875 return self._player_cache[sig_id]
1876
1877 try:
1878 player_id = ('nsig', player_url)
1879 if player_id not in self._player_cache:
1880 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
1881 func = self._player_cache[player_id]
1882 self._player_cache[sig_id] = func(s)
1883 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
1884 return self._player_cache[sig_id]
1885 except Exception as e:
1886 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
1887
1888 def _extract_n_function_name(self, jscode):
1889 return self._search_regex(
1890 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
1891 jscode, 'Initial JS player n function name', group='nfunc')
1892
1893 def _extract_n_function(self, video_id, player_url):
1894 player_id = self._extract_player_info(player_url)
1895 func_code = self._downloader.cache.load('youtube-nsig', player_id)
1896
1897 if func_code:
1898 jsi = JSInterpreter(func_code)
1899 else:
1900 jscode = self._load_player(video_id, player_url)
1901 funcname = self._extract_n_function_name(jscode)
1902 jsi = JSInterpreter(jscode)
1903 func_code = jsi.extract_function_code(funcname)
1904 self._downloader.cache.store('youtube-nsig', player_id, func_code)
1905
1906 if self.get_param('youtube_print_sig_code'):
1907 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
1908
1909 return lambda s: jsi.extract_function_from_code(*func_code)([s])
1910
1911 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1912 """
1913 Extract signatureTimestamp (sts)
1914 Required to tell API what sig/player version is in use.
1915 """
1916 sts = None
1917 if isinstance(ytcfg, dict):
1918 sts = int_or_none(ytcfg.get('STS'))
1919
1920 if not sts:
1921 # Attempt to extract from player
1922 if player_url is None:
1923 error_msg = 'Cannot extract signature timestamp without player_url.'
1924 if fatal:
1925 raise ExtractorError(error_msg)
1926 self.report_warning(error_msg)
1927 return
1928 code = self._load_player(video_id, player_url, fatal=fatal)
1929 if code:
1930 sts = int_or_none(self._search_regex(
1931 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1932 'JS player signature timestamp', group='sts', fatal=fatal))
1933 return sts
1934
1935 def _mark_watched(self, video_id, player_responses):
1936 playback_url = get_first(
1937 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1938 expected_type=url_or_none)
1939 if not playback_url:
1940 self.report_warning('Unable to mark watched')
1941 return
1942 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1943 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1944
1945 # cpn generation algorithm is reverse engineered from base.js.
1946 # In fact it works even with dummy cpn.
1947 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1948 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1949
1950 qs.update({
1951 'ver': ['2'],
1952 'cpn': [cpn],
1953 })
1954 playback_url = compat_urlparse.urlunparse(
1955 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1956
1957 self._download_webpage(
1958 playback_url, video_id, 'Marking watched',
1959 'Unable to mark watched', fatal=False)
1960
1961 @staticmethod
1962 def _extract_urls(webpage):
1963 # Embedded YouTube player
1964 entries = [
1965 unescapeHTML(mobj.group('url'))
1966 for mobj in re.finditer(r'''(?x)
1967 (?:
1968 <iframe[^>]+?src=|
1969 data-video-url=|
1970 <embed[^>]+?src=|
1971 embedSWF\(?:\s*|
1972 <object[^>]+data=|
1973 new\s+SWFObject\(
1974 )
1975 (["\'])
1976 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1977 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1978 \1''', webpage)]
1979
1980 # lazyYT YouTube embed
1981 entries.extend(list(map(
1982 unescapeHTML,
1983 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1984
1985 # Wordpress "YouTube Video Importer" plugin
1986 matches = re.findall(r'''(?x)<div[^>]+
1987 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1988 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1989 entries.extend(m[-1] for m in matches)
1990
1991 return entries
1992
1993 @staticmethod
1994 def _extract_url(webpage):
1995 urls = YoutubeIE._extract_urls(webpage)
1996 return urls[0] if urls else None
1997
1998 @classmethod
1999 def extract_id(cls, url):
2000 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2001 if mobj is None:
2002 raise ExtractorError('Invalid URL: %s' % url)
2003 return mobj.group('id')
2004
2005 def _extract_chapters_from_json(self, data, duration):
2006 chapter_list = traverse_obj(
2007 data, (
2008 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2009 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2010 ), expected_type=list)
2011
2012 return self._extract_chapters(
2013 chapter_list,
2014 chapter_time=lambda chapter: float_or_none(
2015 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2016 chapter_title=lambda chapter: traverse_obj(
2017 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2018 duration=duration)
2019
2020 def _extract_chapters_from_engagement_panel(self, data, duration):
2021 content_list = traverse_obj(
2022 data,
2023 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2024 expected_type=list, default=[])
2025 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2026 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2027
2028 return next((
2029 filter(None, (
2030 self._extract_chapters(
2031 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2032 chapter_time, chapter_title, duration)
2033 for contents in content_list
2034 ))), [])
2035
2036 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2037 chapters = []
2038 last_chapter = {'start_time': 0}
2039 for idx, chapter in enumerate(chapter_list or []):
2040 title = chapter_title(chapter)
2041 start_time = chapter_time(chapter)
2042 if start_time is None:
2043 continue
2044 last_chapter['end_time'] = start_time
2045 if start_time < last_chapter['start_time']:
2046 if idx == 1:
2047 chapters.pop()
2048 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2049 else:
2050 self.report_warning(f'Invalid start time for chapter "{title}"')
2051 continue
2052 last_chapter = {'start_time': start_time, 'title': title}
2053 chapters.append(last_chapter)
2054 last_chapter['end_time'] = duration
2055 return chapters
2056
2057 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2058 return self._parse_json(self._search_regex(
2059 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2060 regex), webpage, name, default='{}'), video_id, fatal=False)
2061
2062 @staticmethod
2063 def parse_time_text(time_text):
2064 """
2065 Parse the comment time text
2066 time_text is in the format 'X units ago (edited)'
2067 """
2068 time_text_split = time_text.split(' ')
2069 if len(time_text_split) >= 3:
2070 try:
2071 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2072 except ValueError:
2073 return None
2074
2075 def _extract_comment(self, comment_renderer, parent=None):
2076 comment_id = comment_renderer.get('commentId')
2077 if not comment_id:
2078 return
2079
2080 text = self._get_text(comment_renderer, 'contentText')
2081
2082 # note: timestamp is an estimate calculated from the current time and time_text
2083 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2084 time_text_dt = self.parse_time_text(time_text)
2085 if isinstance(time_text_dt, datetime.datetime):
2086 timestamp = calendar.timegm(time_text_dt.timetuple())
2087 author = self._get_text(comment_renderer, 'authorText')
2088 author_id = try_get(comment_renderer,
2089 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2090
2091 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2092 lambda x: x['likeCount']), compat_str)) or 0
2093 author_thumbnail = try_get(comment_renderer,
2094 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2095
2096 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2097 is_favorited = 'creatorHeart' in (try_get(
2098 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2099 return {
2100 'id': comment_id,
2101 'text': text,
2102 'timestamp': timestamp,
2103 'time_text': time_text,
2104 'like_count': votes,
2105 'is_favorited': is_favorited,
2106 'author': author,
2107 'author_id': author_id,
2108 'author_thumbnail': author_thumbnail,
2109 'author_is_uploader': author_is_uploader,
2110 'parent': parent or 'root'
2111 }
2112
2113 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None):
2114
2115 def extract_header(contents):
2116 _continuation = None
2117 for content in contents:
2118 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2119 expected_comment_count = parse_count(self._get_text(
2120 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2121
2122 if expected_comment_count:
2123 comment_counts[1] = expected_comment_count
2124 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2125 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2126 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2127
2128 sort_menu_item = try_get(
2129 comments_header_renderer,
2130 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2131 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2132
2133 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2134 if not _continuation:
2135 continue
2136
2137 sort_text = sort_menu_item.get('title')
2138 if isinstance(sort_text, compat_str):
2139 sort_text = sort_text.lower()
2140 else:
2141 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2142 self.to_screen('Sorting comments by %s' % sort_text)
2143 break
2144 return _continuation
2145
2146 def extract_thread(contents):
2147 if not parent:
2148 comment_counts[2] = 0
2149 for content in contents:
2150 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2151 comment_renderer = try_get(
2152 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2153 content, (lambda x: x['commentRenderer'], dict))
2154
2155 if not comment_renderer:
2156 continue
2157 comment = self._extract_comment(comment_renderer, parent)
2158 if not comment:
2159 continue
2160 comment_counts[0] += 1
2161 yield comment
2162 # Attempt to get the replies
2163 comment_replies_renderer = try_get(
2164 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2165
2166 if comment_replies_renderer:
2167 comment_counts[2] += 1
2168 comment_entries_iter = self._comment_entries(
2169 comment_replies_renderer, ytcfg, video_id,
2170 parent=comment.get('id'), comment_counts=comment_counts)
2171
2172 for reply_comment in comment_entries_iter:
2173 yield reply_comment
2174
2175 # YouTube comments have a max depth of 2
2176 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2177 if max_depth == 1 and parent:
2178 return
2179 if not comment_counts:
2180 # comment so far, est. total comments, current comment thread #
2181 comment_counts = [0, 0, 0]
2182
2183 continuation = self._extract_continuation(root_continuation_data)
2184 if continuation and len(continuation['continuation']) < 27:
2185 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2186 continuation_token = self._generate_comment_continuation(video_id)
2187 continuation = self._build_api_continuation_query(continuation_token, None)
2188
2189 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2190 if message and not parent:
2191 self.report_warning(message, video_id=video_id)
2192
2193 visitor_data = None
2194 is_first_continuation = parent is None
2195
2196 for page_num in itertools.count(0):
2197 if not continuation:
2198 break
2199 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
2200 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2201 if page_num == 0:
2202 if is_first_continuation:
2203 note_prefix = 'Downloading comment section API JSON'
2204 else:
2205 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2206 comment_counts[2], comment_prog_str)
2207 else:
2208 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2209 ' ' if parent else '', ' replies' if parent else '',
2210 page_num, comment_prog_str)
2211
2212 response = self._extract_response(
2213 item_id=None, query=continuation,
2214 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2215 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2216 if not response:
2217 break
2218 visitor_data = try_get(
2219 response,
2220 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2221 compat_str) or visitor_data
2222
2223 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2224
2225 continuation = None
2226 if isinstance(continuation_contents, list):
2227 for continuation_section in continuation_contents:
2228 if not isinstance(continuation_section, dict):
2229 continue
2230 continuation_items = try_get(
2231 continuation_section,
2232 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2233 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2234 list) or []
2235 if is_first_continuation:
2236 continuation = extract_header(continuation_items)
2237 is_first_continuation = False
2238 if continuation:
2239 break
2240 continue
2241 count = 0
2242 for count, entry in enumerate(extract_thread(continuation_items)):
2243 yield entry
2244 continuation = self._extract_continuation({'contents': continuation_items})
2245 if continuation:
2246 # Sometimes YouTube provides a continuation without any comments
2247 # In most cases we end up just downloading these with very little comments to come.
2248 if count == 0:
2249 if not parent:
2250 self.report_warning('No comments received - assuming end of comments')
2251 continuation = None
2252 break
2253
2254 # Deprecated response structure
2255 elif isinstance(continuation_contents, dict):
2256 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2257 for key, continuation_renderer in continuation_contents.items():
2258 if key not in known_continuation_renderers:
2259 continue
2260 if not isinstance(continuation_renderer, dict):
2261 continue
2262 if is_first_continuation:
2263 header_continuation_items = [continuation_renderer.get('header') or {}]
2264 continuation = extract_header(header_continuation_items)
2265 is_first_continuation = False
2266 if continuation:
2267 break
2268
2269 # Sometimes YouTube provides a continuation without any comments
2270 # In most cases we end up just downloading these with very little comments to come.
2271 count = 0
2272 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2273 yield entry
2274 continuation = self._extract_continuation(continuation_renderer)
2275 if count == 0:
2276 if not parent:
2277 self.report_warning('No comments received - assuming end of comments')
2278 continuation = None
2279 break
2280
2281 @staticmethod
2282 def _generate_comment_continuation(video_id):
2283 """
2284 Generates initial comment section continuation token from given video id
2285 """
2286 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2287 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2288 new_continuation_intlist = list(itertools.chain.from_iterable(
2289 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2290 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2291
2292 def _get_comments(self, ytcfg, video_id, contents, webpage):
2293 """Entry for comment extraction"""
2294 def _real_comment_extract(contents):
2295 renderer = next((
2296 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2297 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2298 yield from self._comment_entries(renderer, ytcfg, video_id)
2299
2300 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
2301 # Force English regardless of account setting to prevent parsing issues
2302 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2303 ytcfg = copy.deepcopy(ytcfg)
2304 traverse_obj(
2305 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2306 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
2307
2308 @staticmethod
2309 def _get_checkok_params():
2310 return {'contentCheckOk': True, 'racyCheckOk': True}
2311
2312 @classmethod
2313 def _generate_player_context(cls, sts=None):
2314 context = {
2315 'html5Preference': 'HTML5_PREF_WANTS',
2316 }
2317 if sts is not None:
2318 context['signatureTimestamp'] = sts
2319 return {
2320 'playbackContext': {
2321 'contentPlaybackContext': context
2322 },
2323 **cls._get_checkok_params()
2324 }
2325
2326 @staticmethod
2327 def _is_agegated(player_response):
2328 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2329 return True
2330
2331 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2332 AGE_GATE_REASONS = (
2333 'confirm your age', 'age-restricted', 'inappropriate', # reason
2334 'age_verification_required', 'age_check_required', # status
2335 )
2336 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2337
2338 @staticmethod
2339 def _is_unplayable(player_response):
2340 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2341
2342 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2343
2344 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2345 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2346 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2347 headers = self.generate_api_headers(
2348 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2349
2350 yt_query = {'videoId': video_id}
2351 yt_query.update(self._generate_player_context(sts))
2352 return self._extract_response(
2353 item_id=video_id, ep='player', query=yt_query,
2354 ytcfg=player_ytcfg, headers=headers, fatal=True,
2355 default_client=client,
2356 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2357 ) or None
2358
2359 def _get_requested_clients(self, url, smuggled_data):
2360 requested_clients = []
2361 default = ['android', 'web']
2362 allowed_clients = sorted(
2363 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2364 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2365 for client in self._configuration_arg('player_client'):
2366 if client in allowed_clients:
2367 requested_clients.append(client)
2368 elif client == 'default':
2369 requested_clients.extend(default)
2370 elif client == 'all':
2371 requested_clients.extend(allowed_clients)
2372 else:
2373 self.report_warning(f'Skipping unsupported client {client}')
2374 if not requested_clients:
2375 requested_clients = default
2376
2377 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2378 requested_clients.extend(
2379 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2380
2381 return orderedSet(requested_clients)
2382
2383 def _extract_player_ytcfg(self, client, video_id):
2384 url = {
2385 'web_music': 'https://music.youtube.com',
2386 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2387 }.get(client)
2388 if not url:
2389 return {}
2390 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2391 return self.extract_ytcfg(video_id, webpage) or {}
2392
2393 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2394 initial_pr = None
2395 if webpage:
2396 initial_pr = self._extract_yt_initial_variable(
2397 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2398 video_id, 'initial player response')
2399
2400 original_clients = clients
2401 clients = clients[::-1]
2402 prs = []
2403
2404 def append_client(client_name):
2405 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2406 clients.append(client_name)
2407
2408 # Android player_response does not have microFormats which are needed for
2409 # extraction of some data. So we return the initial_pr with formats
2410 # stripped out even if not requested by the user
2411 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2412 if initial_pr:
2413 pr = dict(initial_pr)
2414 pr['streamingData'] = None
2415 prs.append(pr)
2416
2417 last_error = None
2418 tried_iframe_fallback = False
2419 player_url = None
2420 while clients:
2421 client = clients.pop()
2422 player_ytcfg = master_ytcfg if client == 'web' else {}
2423 if 'configs' not in self._configuration_arg('player_skip'):
2424 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2425
2426 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2427 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2428 if 'js' in self._configuration_arg('player_skip'):
2429 require_js_player = False
2430 player_url = None
2431
2432 if not player_url and not tried_iframe_fallback and require_js_player:
2433 player_url = self._download_player_url(video_id)
2434 tried_iframe_fallback = True
2435
2436 try:
2437 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2438 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2439 except ExtractorError as e:
2440 if last_error:
2441 self.report_warning(last_error)
2442 last_error = e
2443 continue
2444
2445 if pr:
2446 prs.append(pr)
2447
2448 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2449 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2450 append_client(client.replace('_agegate', '_creator'))
2451 elif self._is_agegated(pr):
2452 append_client(f'{client}_agegate')
2453
2454 if last_error:
2455 if not len(prs):
2456 raise last_error
2457 self.report_warning(last_error)
2458 return prs, player_url
2459
2460 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2461 itags, stream_ids = {}, []
2462 itag_qualities, res_qualities = {}, {}
2463 q = qualities([
2464 # Normally tiny is the smallest video-only formats. But
2465 # audio-only formats with unknown quality may get tagged as tiny
2466 'tiny',
2467 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2468 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2469 ])
2470 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2471
2472 for fmt in streaming_formats:
2473 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2474 continue
2475
2476 itag = str_or_none(fmt.get('itag'))
2477 audio_track = fmt.get('audioTrack') or {}
2478 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2479 if stream_id in stream_ids:
2480 continue
2481
2482 quality = fmt.get('quality')
2483 height = int_or_none(fmt.get('height'))
2484 if quality == 'tiny' or not quality:
2485 quality = fmt.get('audioQuality', '').lower() or quality
2486 # The 3gp format (17) in android client has a quality of "small",
2487 # but is actually worse than other formats
2488 if itag == '17':
2489 quality = 'tiny'
2490 if quality:
2491 if itag:
2492 itag_qualities[itag] = quality
2493 if height:
2494 res_qualities[height] = quality
2495 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2496 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2497 # number of fragment that would subsequently requested with (`&sq=N`)
2498 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2499 continue
2500
2501 fmt_url = fmt.get('url')
2502 if not fmt_url:
2503 sc = compat_parse_qs(fmt.get('signatureCipher'))
2504 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2505 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2506 if not (sc and fmt_url and encrypted_sig):
2507 continue
2508 if not player_url:
2509 continue
2510 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2511 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2512 fmt_url += '&' + sp + '=' + signature
2513
2514 query = parse_qs(fmt_url)
2515 throttled = False
2516 if query.get('ratebypass') != ['yes'] and query.get('n'):
2517 try:
2518 fmt_url = update_url_query(fmt_url, {
2519 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2520 except ExtractorError as e:
2521 self.report_warning(
2522 f'nsig extraction failed: You may experience throttling for some formats\n'
2523 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
2524 throttled = True
2525
2526 if itag:
2527 itags[itag] = 'https'
2528 stream_ids.append(stream_id)
2529
2530 tbr = float_or_none(
2531 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2532 dct = {
2533 'asr': int_or_none(fmt.get('audioSampleRate')),
2534 'filesize': int_or_none(fmt.get('contentLength')),
2535 'format_id': itag,
2536 'format_note': join_nonempty(
2537 '%s%s' % (audio_track.get('displayName') or '',
2538 ' (default)' if audio_track.get('audioIsDefault') else ''),
2539 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
2540 throttled and 'THROTTLED', delim=', '),
2541 'source_preference': -10 if throttled else -1,
2542 'fps': int_or_none(fmt.get('fps')) or None,
2543 'height': height,
2544 'quality': q(quality),
2545 'tbr': tbr,
2546 'url': fmt_url,
2547 'width': int_or_none(fmt.get('width')),
2548 'language': audio_track.get('id', '').split('.')[0],
2549 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2550 }
2551 mime_mobj = re.match(
2552 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2553 if mime_mobj:
2554 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2555 dct.update(parse_codecs(mime_mobj.group(2)))
2556 no_audio = dct.get('acodec') == 'none'
2557 no_video = dct.get('vcodec') == 'none'
2558 if no_audio:
2559 dct['vbr'] = tbr
2560 if no_video:
2561 dct['abr'] = tbr
2562 if no_audio or no_video:
2563 dct['downloader_options'] = {
2564 # Youtube throttles chunks >~10M
2565 'http_chunk_size': 10485760,
2566 }
2567 if dct.get('ext'):
2568 dct['container'] = dct['ext'] + '_dash'
2569 yield dct
2570
2571 skip_manifests = self._configuration_arg('skip')
2572 get_dash = (
2573 (not is_live or self._configuration_arg('include_live_dash'))
2574 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2575 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2576
2577 def process_manifest_format(f, proto, itag):
2578 if itag in itags:
2579 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2580 return False
2581 itag = f'{itag}-{proto}'
2582 if itag:
2583 f['format_id'] = itag
2584 itags[itag] = proto
2585
2586 f['quality'] = next((
2587 q(qdict[val])
2588 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
2589 if val in qdict), -1)
2590 return True
2591
2592 for sd in streaming_data:
2593 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2594 if hls_manifest_url:
2595 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2596 if process_manifest_format(f, 'hls', self._search_regex(
2597 r'/itag/(\d+)', f['url'], 'itag', default=None)):
2598 yield f
2599
2600 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2601 if dash_manifest_url:
2602 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2603 if process_manifest_format(f, 'dash', f['format_id']):
2604 f['filesize'] = int_or_none(self._search_regex(
2605 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2606 yield f
2607
2608 def _extract_storyboard(self, player_responses, duration):
2609 spec = get_first(
2610 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2611 if not spec:
2612 return
2613 base_url = spec.pop()
2614 L = len(spec) - 1
2615 for i, args in enumerate(spec):
2616 args = args.split('#')
2617 counts = list(map(int_or_none, args[:5]))
2618 if len(args) != 8 or not all(counts):
2619 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2620 continue
2621 width, height, frame_count, cols, rows = counts
2622 N, sigh = args[6:]
2623
2624 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2625 fragment_count = frame_count / (cols * rows)
2626 fragment_duration = duration / fragment_count
2627 yield {
2628 'format_id': f'sb{i}',
2629 'format_note': 'storyboard',
2630 'ext': 'mhtml',
2631 'protocol': 'mhtml',
2632 'acodec': 'none',
2633 'vcodec': 'none',
2634 'url': url,
2635 'width': width,
2636 'height': height,
2637 'fragments': [{
2638 'path': url.replace('$M', str(j)),
2639 'duration': min(fragment_duration, duration - (j * fragment_duration)),
2640 } for j in range(math.ceil(fragment_count))],
2641 }
2642
2643 def _real_extract(self, url):
2644 url, smuggled_data = unsmuggle_url(url, {})
2645 video_id = self._match_id(url)
2646
2647 base_url = self.http_scheme() + '//www.youtube.com/'
2648 webpage_url = base_url + 'watch?v=' + video_id
2649 webpage = None
2650 if 'webpage' not in self._configuration_arg('player_skip'):
2651 webpage = self._download_webpage(
2652 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2653
2654 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2655
2656 player_responses, player_url = self._extract_player_responses(
2657 self._get_requested_clients(url, smuggled_data),
2658 video_id, webpage, master_ytcfg)
2659
2660 playability_statuses = traverse_obj(
2661 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2662
2663 trailer_video_id = get_first(
2664 playability_statuses,
2665 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2666 expected_type=str)
2667 if trailer_video_id:
2668 return self.url_result(
2669 trailer_video_id, self.ie_key(), trailer_video_id)
2670
2671 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2672 if webpage else (lambda x: None))
2673
2674 video_details = traverse_obj(
2675 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2676 microformats = traverse_obj(
2677 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2678 expected_type=dict, default=[])
2679 video_title = (
2680 get_first(video_details, 'title')
2681 or self._get_text(microformats, (..., 'title'))
2682 or search_meta(['og:title', 'twitter:title', 'title']))
2683 video_description = get_first(video_details, 'shortDescription')
2684
2685 multifeed_metadata_list = get_first(
2686 player_responses,
2687 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2688 expected_type=str)
2689 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2690 if self.get_param('noplaylist'):
2691 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2692 else:
2693 entries = []
2694 feed_ids = []
2695 for feed in multifeed_metadata_list.split(','):
2696 # Unquote should take place before split on comma (,) since textual
2697 # fields may contain comma as well (see
2698 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2699 feed_data = compat_parse_qs(
2700 compat_urllib_parse_unquote_plus(feed))
2701
2702 def feed_entry(name):
2703 return try_get(
2704 feed_data, lambda x: x[name][0], compat_str)
2705
2706 feed_id = feed_entry('id')
2707 if not feed_id:
2708 continue
2709 feed_title = feed_entry('title')
2710 title = video_title
2711 if feed_title:
2712 title += ' (%s)' % feed_title
2713 entries.append({
2714 '_type': 'url_transparent',
2715 'ie_key': 'Youtube',
2716 'url': smuggle_url(
2717 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2718 {'force_singlefeed': True}),
2719 'title': title,
2720 })
2721 feed_ids.append(feed_id)
2722 self.to_screen(
2723 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2724 % (', '.join(feed_ids), video_id))
2725 return self.playlist_result(
2726 entries, video_id, video_title, video_description)
2727
2728 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2729 is_live = get_first(video_details, 'isLive')
2730 if is_live is None:
2731 is_live = get_first(live_broadcast_details, 'isLiveNow')
2732
2733 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2734 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2735
2736 if not formats:
2737 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2738 self.report_drm(video_id)
2739 pemr = get_first(
2740 playability_statuses,
2741 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2742 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2743 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2744 if subreason:
2745 if subreason == 'The uploader has not made this video available in your country.':
2746 countries = get_first(microformats, 'availableCountries')
2747 if not countries:
2748 regions_allowed = search_meta('regionsAllowed')
2749 countries = regions_allowed.split(',') if regions_allowed else None
2750 self.raise_geo_restricted(subreason, countries, metadata_available=True)
2751 reason += f'. {subreason}'
2752 if reason:
2753 self.raise_no_formats(reason, expected=True)
2754
2755 keywords = get_first(video_details, 'keywords', expected_type=list) or []
2756 if not keywords and webpage:
2757 keywords = [
2758 unescapeHTML(m.group('content'))
2759 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2760 for keyword in keywords:
2761 if keyword.startswith('yt:stretch='):
2762 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2763 if mobj:
2764 # NB: float is intentional for forcing float division
2765 w, h = (float(v) for v in mobj.groups())
2766 if w > 0 and h > 0:
2767 ratio = w / h
2768 for f in formats:
2769 if f.get('vcodec') != 'none':
2770 f['stretched_ratio'] = ratio
2771 break
2772
2773 thumbnails = []
2774 thumbnail_dicts = traverse_obj(
2775 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2776 expected_type=dict, default=[])
2777 for thumbnail in thumbnail_dicts:
2778 thumbnail_url = thumbnail.get('url')
2779 if not thumbnail_url:
2780 continue
2781 # Sometimes youtube gives a wrong thumbnail URL. See:
2782 # https://github.com/yt-dlp/yt-dlp/issues/233
2783 # https://github.com/ytdl-org/youtube-dl/issues/28023
2784 if 'maxresdefault' in thumbnail_url:
2785 thumbnail_url = thumbnail_url.split('?')[0]
2786 thumbnails.append({
2787 'url': thumbnail_url,
2788 'height': int_or_none(thumbnail.get('height')),
2789 'width': int_or_none(thumbnail.get('width')),
2790 })
2791 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2792 if thumbnail_url:
2793 thumbnails.append({
2794 'url': thumbnail_url,
2795 })
2796 original_thumbnails = thumbnails.copy()
2797
2798 # The best resolution thumbnails sometimes does not appear in the webpage
2799 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2800 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2801 thumbnail_names = [
2802 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
2803 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2804 'mqdefault', 'mq1', 'mq2', 'mq3',
2805 'default', '1', '2', '3'
2806 ]
2807 n_thumbnail_names = len(thumbnail_names)
2808 thumbnails.extend({
2809 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2810 video_id=video_id, name=name, ext=ext,
2811 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2812 } for name in thumbnail_names for ext in ('webp', 'jpg'))
2813 for thumb in thumbnails:
2814 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2815 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2816 self._remove_duplicate_formats(thumbnails)
2817 self._downloader._sort_thumbnails(original_thumbnails)
2818
2819 category = get_first(microformats, 'category') or search_meta('genre')
2820 channel_id = str_or_none(
2821 get_first(video_details, 'channelId')
2822 or get_first(microformats, 'externalChannelId')
2823 or search_meta('channelId'))
2824 duration = int_or_none(
2825 get_first(video_details, 'lengthSeconds')
2826 or get_first(microformats, 'lengthSeconds')
2827 or parse_duration(search_meta('duration'))) or None
2828 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2829
2830 live_content = get_first(video_details, 'isLiveContent')
2831 is_upcoming = get_first(video_details, 'isUpcoming')
2832 if is_live is None:
2833 if is_upcoming or live_content is False:
2834 is_live = False
2835 if is_upcoming is None and (live_content or is_live):
2836 is_upcoming = False
2837 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2838 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2839 if not duration and live_endtime and live_starttime:
2840 duration = live_endtime - live_starttime
2841
2842 formats.extend(self._extract_storyboard(player_responses, duration))
2843
2844 # Source is given priority since formats that throttle are given lower source_preference
2845 # When throttling issue is fully fixed, remove this
2846 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2847
2848 info = {
2849 'id': video_id,
2850 'title': self._live_title(video_title) if is_live else video_title,
2851 'formats': formats,
2852 'thumbnails': thumbnails,
2853 # The best thumbnail that we are sure exists. Prevents unnecessary
2854 # URL checking if user don't care about getting the best possible thumbnail
2855 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
2856 'description': video_description,
2857 'upload_date': unified_strdate(
2858 get_first(microformats, 'uploadDate')
2859 or search_meta('uploadDate')),
2860 'uploader': get_first(video_details, 'author'),
2861 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2862 'uploader_url': owner_profile_url,
2863 'channel_id': channel_id,
2864 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2865 'duration': duration,
2866 'view_count': int_or_none(
2867 get_first((video_details, microformats), (..., 'viewCount'))
2868 or search_meta('interactionCount')),
2869 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2870 'age_limit': 18 if (
2871 get_first(microformats, 'isFamilySafe') is False
2872 or search_meta('isFamilyFriendly') == 'false'
2873 or search_meta('og:restrictions:age') == '18+') else 0,
2874 'webpage_url': webpage_url,
2875 'categories': [category] if category else None,
2876 'tags': keywords,
2877 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2878 'is_live': is_live,
2879 'was_live': (False if is_live or is_upcoming or live_content is False
2880 else None if is_live is None or is_upcoming is None
2881 else live_content),
2882 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2883 'release_timestamp': live_starttime,
2884 }
2885
2886 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2887 if pctr:
2888 def get_lang_code(track):
2889 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
2890 or track.get('languageCode'))
2891
2892 # Converted into dicts to remove duplicates
2893 captions = {
2894 get_lang_code(sub): sub
2895 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2896 translation_languages = {
2897 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
2898 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2899
2900 def process_language(container, base_url, lang_code, sub_name, query):
2901 lang_subs = container.setdefault(lang_code, [])
2902 for fmt in self._SUBTITLE_FORMATS:
2903 query.update({
2904 'fmt': fmt,
2905 })
2906 lang_subs.append({
2907 'ext': fmt,
2908 'url': update_url_query(base_url, query),
2909 'name': sub_name,
2910 })
2911
2912 subtitles, automatic_captions = {}, {}
2913 for lang_code, caption_track in captions.items():
2914 base_url = caption_track.get('baseUrl')
2915 if not base_url:
2916 continue
2917 lang_name = self._get_text(caption_track, 'name', max_runs=1)
2918 if caption_track.get('kind') != 'asr':
2919 if not lang_code:
2920 continue
2921 process_language(
2922 subtitles, base_url, lang_code, lang_name, {})
2923 if not caption_track.get('isTranslatable'):
2924 continue
2925 for trans_code, trans_name in translation_languages.items():
2926 if not trans_code:
2927 continue
2928 if caption_track.get('kind') != 'asr':
2929 trans_code += f'-{lang_code}'
2930 trans_name += format_field(lang_name, template=' from %s')
2931 process_language(
2932 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
2933 info['automatic_captions'] = automatic_captions
2934 info['subtitles'] = subtitles
2935
2936 parsed_url = compat_urllib_parse_urlparse(url)
2937 for component in [parsed_url.fragment, parsed_url.query]:
2938 query = compat_parse_qs(component)
2939 for k, v in query.items():
2940 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2941 d_k += '_time'
2942 if d_k not in info and k in s_ks:
2943 info[d_k] = parse_duration(query[k][0])
2944
2945 # Youtube Music Auto-generated description
2946 if video_description:
2947 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2948 if mobj:
2949 release_year = mobj.group('release_year')
2950 release_date = mobj.group('release_date')
2951 if release_date:
2952 release_date = release_date.replace('-', '')
2953 if not release_year:
2954 release_year = release_date[:4]
2955 info.update({
2956 'album': mobj.group('album'.strip()),
2957 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2958 'track': mobj.group('track').strip(),
2959 'release_date': release_date,
2960 'release_year': int_or_none(release_year),
2961 })
2962
2963 initial_data = None
2964 if webpage:
2965 initial_data = self._extract_yt_initial_variable(
2966 webpage, self._YT_INITIAL_DATA_RE, video_id,
2967 'yt initial data')
2968 if not initial_data:
2969 query = {'videoId': video_id}
2970 query.update(self._get_checkok_params())
2971 initial_data = self._extract_response(
2972 item_id=video_id, ep='next', fatal=False,
2973 ytcfg=master_ytcfg, query=query,
2974 headers=self.generate_api_headers(ytcfg=master_ytcfg),
2975 note='Downloading initial data API JSON')
2976
2977 try:
2978 # This will error if there is no livechat
2979 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2980 info.setdefault('subtitles', {})['live_chat'] = [{
2981 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2982 'video_id': video_id,
2983 'ext': 'json',
2984 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2985 }]
2986 except (KeyError, IndexError, TypeError):
2987 pass
2988
2989 if initial_data:
2990 info['chapters'] = (
2991 self._extract_chapters_from_json(initial_data, duration)
2992 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2993 or None)
2994
2995 contents = try_get(
2996 initial_data,
2997 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2998 list) or []
2999 for content in contents:
3000 vpir = content.get('videoPrimaryInfoRenderer')
3001 if vpir:
3002 stl = vpir.get('superTitleLink')
3003 if stl:
3004 stl = self._get_text(stl)
3005 if try_get(
3006 vpir,
3007 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3008 info['location'] = stl
3009 else:
3010 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3011 if mobj:
3012 info.update({
3013 'series': mobj.group(1),
3014 'season_number': int(mobj.group(2)),
3015 'episode_number': int(mobj.group(3)),
3016 })
3017 for tlb in (try_get(
3018 vpir,
3019 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3020 list) or []):
3021 tbr = tlb.get('toggleButtonRenderer') or {}
3022 for getter, regex in [(
3023 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3024 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3025 lambda x: x['accessibility'],
3026 lambda x: x['accessibilityData']['accessibilityData'],
3027 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3028 label = (try_get(tbr, getter, dict) or {}).get('label')
3029 if label:
3030 mobj = re.match(regex, label)
3031 if mobj:
3032 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3033 break
3034 sbr_tooltip = try_get(
3035 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3036 if sbr_tooltip:
3037 like_count, dislike_count = sbr_tooltip.split(' / ')
3038 info.update({
3039 'like_count': str_to_int(like_count),
3040 'dislike_count': str_to_int(dislike_count),
3041 })
3042 vsir = content.get('videoSecondaryInfoRenderer')
3043 if vsir:
3044 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3045 rows = try_get(
3046 vsir,
3047 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3048 list) or []
3049 multiple_songs = False
3050 for row in rows:
3051 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3052 multiple_songs = True
3053 break
3054 for row in rows:
3055 mrr = row.get('metadataRowRenderer') or {}
3056 mrr_title = mrr.get('title')
3057 if not mrr_title:
3058 continue
3059 mrr_title = self._get_text(mrr, 'title')
3060 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3061 if mrr_title == 'License':
3062 info['license'] = mrr_contents_text
3063 elif not multiple_songs:
3064 if mrr_title == 'Album':
3065 info['album'] = mrr_contents_text
3066 elif mrr_title == 'Artist':
3067 info['artist'] = mrr_contents_text
3068 elif mrr_title == 'Song':
3069 info['track'] = mrr_contents_text
3070
3071 fallbacks = {
3072 'channel': 'uploader',
3073 'channel_id': 'uploader_id',
3074 'channel_url': 'uploader_url',
3075 }
3076 for to, frm in fallbacks.items():
3077 if not info.get(to):
3078 info[to] = info.get(frm)
3079
3080 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3081 v = info.get(s_k)
3082 if v:
3083 info[d_k] = v
3084
3085 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3086 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3087 is_membersonly = None
3088 is_premium = None
3089 if initial_data and is_private is not None:
3090 is_membersonly = False
3091 is_premium = False
3092 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3093 badge_labels = set()
3094 for content in contents:
3095 if not isinstance(content, dict):
3096 continue
3097 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3098 for badge_label in badge_labels:
3099 if badge_label.lower() == 'members only':
3100 is_membersonly = True
3101 elif badge_label.lower() == 'premium':
3102 is_premium = True
3103 elif badge_label.lower() == 'unlisted':
3104 is_unlisted = True
3105
3106 info['availability'] = self._availability(
3107 is_private=is_private,
3108 needs_premium=is_premium,
3109 needs_subscription=is_membersonly,
3110 needs_auth=info['age_limit'] >= 18,
3111 is_unlisted=None if is_private is None else is_unlisted)
3112
3113 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3114
3115 self.mark_watched(video_id, player_responses)
3116
3117 return info
3118
3119
3120 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3121 IE_DESC = 'YouTube Tabs'
3122 _VALID_URL = r'''(?x)
3123 https?://
3124 (?:\w+\.)?
3125 (?:
3126 youtube(?:kids)?\.com|
3127 %(invidious)s
3128 )/
3129 (?:
3130 (?P<channel_type>channel|c|user|browse)/|
3131 (?P<not_channel>
3132 feed/|hashtag/|
3133 (?:playlist|watch)\?.*?\blist=
3134 )|
3135 (?!(?:%(reserved_names)s)\b) # Direct URLs
3136 )
3137 (?P<id>[^/?\#&]+)
3138 ''' % {
3139 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3140 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3141 }
3142 IE_NAME = 'youtube:tab'
3143
3144 _TESTS = [{
3145 'note': 'playlists, multipage',
3146 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3147 'playlist_mincount': 94,
3148 'info_dict': {
3149 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3150 'title': 'Игорь Клейнер - Playlists',
3151 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3152 'uploader': 'Игорь Клейнер',
3153 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3154 },
3155 }, {
3156 'note': 'playlists, multipage, different order',
3157 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3158 'playlist_mincount': 94,
3159 'info_dict': {
3160 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3161 'title': 'Игорь Клейнер - Playlists',
3162 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3163 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3164 'uploader': 'Игорь Клейнер',
3165 },
3166 }, {
3167 'note': 'playlists, series',
3168 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3169 'playlist_mincount': 5,
3170 'info_dict': {
3171 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3172 'title': '3Blue1Brown - Playlists',
3173 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3174 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3175 'uploader': '3Blue1Brown',
3176 },
3177 }, {
3178 'note': 'playlists, singlepage',
3179 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3180 'playlist_mincount': 4,
3181 'info_dict': {
3182 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3183 'title': 'ThirstForScience - Playlists',
3184 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3185 'uploader': 'ThirstForScience',
3186 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3187 }
3188 }, {
3189 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3190 'only_matching': True,
3191 }, {
3192 'note': 'basic, single video playlist',
3193 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3194 'info_dict': {
3195 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3196 'uploader': 'Sergey M.',
3197 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3198 'title': 'youtube-dl public playlist',
3199 },
3200 'playlist_count': 1,
3201 }, {
3202 'note': 'empty playlist',
3203 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3204 'info_dict': {
3205 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3206 'uploader': 'Sergey M.',
3207 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3208 'title': 'youtube-dl empty playlist',
3209 },
3210 'playlist_count': 0,
3211 }, {
3212 'note': 'Home tab',
3213 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3214 'info_dict': {
3215 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3216 'title': 'lex will - Home',
3217 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3218 'uploader': 'lex will',
3219 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3220 },
3221 'playlist_mincount': 2,
3222 }, {
3223 'note': 'Videos tab',
3224 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3225 'info_dict': {
3226 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3227 'title': 'lex will - Videos',
3228 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3229 'uploader': 'lex will',
3230 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3231 },
3232 'playlist_mincount': 975,
3233 }, {
3234 'note': 'Videos tab, sorted by popular',
3235 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3236 'info_dict': {
3237 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3238 'title': 'lex will - Videos',
3239 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3240 'uploader': 'lex will',
3241 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3242 },
3243 'playlist_mincount': 199,
3244 }, {
3245 'note': 'Playlists tab',
3246 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3247 'info_dict': {
3248 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3249 'title': 'lex will - Playlists',
3250 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3251 'uploader': 'lex will',
3252 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3253 },
3254 'playlist_mincount': 17,
3255 }, {
3256 'note': 'Community tab',
3257 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3258 'info_dict': {
3259 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3260 'title': 'lex will - Community',
3261 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3262 'uploader': 'lex will',
3263 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3264 },
3265 'playlist_mincount': 18,
3266 }, {
3267 'note': 'Channels tab',
3268 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3269 'info_dict': {
3270 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3271 'title': 'lex will - Channels',
3272 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3273 'uploader': 'lex will',
3274 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3275 },
3276 'playlist_mincount': 12,
3277 }, {
3278 'note': 'Search tab',
3279 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3280 'playlist_mincount': 40,
3281 'info_dict': {
3282 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3283 'title': '3Blue1Brown - Search - linear algebra',
3284 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3285 'uploader': '3Blue1Brown',
3286 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3287 },
3288 }, {
3289 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3290 'only_matching': True,
3291 }, {
3292 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3293 'only_matching': True,
3294 }, {
3295 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3296 'only_matching': True,
3297 }, {
3298 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3299 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3300 'info_dict': {
3301 'title': '29C3: Not my department',
3302 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3303 'uploader': 'Christiaan008',
3304 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3305 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3306 },
3307 'playlist_count': 96,
3308 }, {
3309 'note': 'Large playlist',
3310 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3311 'info_dict': {
3312 'title': 'Uploads from Cauchemar',
3313 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3314 'uploader': 'Cauchemar',
3315 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3316 },
3317 'playlist_mincount': 1123,
3318 }, {
3319 'note': 'even larger playlist, 8832 videos',
3320 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3321 'only_matching': True,
3322 }, {
3323 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3324 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3325 'info_dict': {
3326 'title': 'Uploads from Interstellar Movie',
3327 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3328 'uploader': 'Interstellar Movie',
3329 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3330 },
3331 'playlist_mincount': 21,
3332 }, {
3333 'note': 'Playlist with "show unavailable videos" button',
3334 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3335 'info_dict': {
3336 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3337 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3338 'uploader': 'Phim Siêu Nhân Nhật Bản',
3339 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3340 },
3341 'playlist_mincount': 200,
3342 }, {
3343 'note': 'Playlist with unavailable videos in page 7',
3344 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3345 'info_dict': {
3346 'title': 'Uploads from BlankTV',
3347 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3348 'uploader': 'BlankTV',
3349 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3350 },
3351 'playlist_mincount': 1000,
3352 }, {
3353 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3354 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3355 'info_dict': {
3356 'title': 'Data Analysis with Dr Mike Pound',
3357 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3358 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3359 'uploader': 'Computerphile',
3360 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3361 },
3362 'playlist_mincount': 11,
3363 }, {
3364 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3365 'only_matching': True,
3366 }, {
3367 'note': 'Playlist URL that does not actually serve a playlist',
3368 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3369 'info_dict': {
3370 'id': 'FqZTN594JQw',
3371 'ext': 'webm',
3372 'title': "Smiley's People 01 detective, Adventure Series, Action",
3373 'uploader': 'STREEM',
3374 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3375 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3376 'upload_date': '20150526',
3377 'license': 'Standard YouTube License',
3378 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3379 'categories': ['People & Blogs'],
3380 'tags': list,
3381 'view_count': int,
3382 'like_count': int,
3383 'dislike_count': int,
3384 },
3385 'params': {
3386 'skip_download': True,
3387 },
3388 'skip': 'This video is not available.',
3389 'add_ie': [YoutubeIE.ie_key()],
3390 }, {
3391 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3392 'only_matching': True,
3393 }, {
3394 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3395 'only_matching': True,
3396 }, {
3397 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3398 'info_dict': {
3399 'id': '3yImotZU3tw', # This will keep changing
3400 'ext': 'mp4',
3401 'title': compat_str,
3402 'uploader': 'Sky News',
3403 'uploader_id': 'skynews',
3404 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3405 'upload_date': r're:\d{8}',
3406 'description': compat_str,
3407 'categories': ['News & Politics'],
3408 'tags': list,
3409 'like_count': int,
3410 'dislike_count': int,
3411 },
3412 'params': {
3413 'skip_download': True,
3414 },
3415 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3416 }, {
3417 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3418 'info_dict': {
3419 'id': 'a48o2S1cPoo',
3420 'ext': 'mp4',
3421 'title': 'The Young Turks - Live Main Show',
3422 'uploader': 'The Young Turks',
3423 'uploader_id': 'TheYoungTurks',
3424 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3425 'upload_date': '20150715',
3426 'license': 'Standard YouTube License',
3427 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3428 'categories': ['News & Politics'],
3429 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3430 'like_count': int,
3431 'dislike_count': int,
3432 },
3433 'params': {
3434 'skip_download': True,
3435 },
3436 'only_matching': True,
3437 }, {
3438 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3439 'only_matching': True,
3440 }, {
3441 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3442 'only_matching': True,
3443 }, {
3444 'note': 'A channel that is not live. Should raise error',
3445 'url': 'https://www.youtube.com/user/numberphile/live',
3446 'only_matching': True,
3447 }, {
3448 'url': 'https://www.youtube.com/feed/trending',
3449 'only_matching': True,
3450 }, {
3451 'url': 'https://www.youtube.com/feed/library',
3452 'only_matching': True,
3453 }, {
3454 'url': 'https://www.youtube.com/feed/history',
3455 'only_matching': True,
3456 }, {
3457 'url': 'https://www.youtube.com/feed/subscriptions',
3458 'only_matching': True,
3459 }, {
3460 'url': 'https://www.youtube.com/feed/watch_later',
3461 'only_matching': True,
3462 }, {
3463 'note': 'Recommended - redirects to home page.',
3464 'url': 'https://www.youtube.com/feed/recommended',
3465 'only_matching': True,
3466 }, {
3467 'note': 'inline playlist with not always working continuations',
3468 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3469 'only_matching': True,
3470 }, {
3471 'url': 'https://www.youtube.com/course',
3472 'only_matching': True,
3473 }, {
3474 'url': 'https://www.youtube.com/zsecurity',
3475 'only_matching': True,
3476 }, {
3477 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3478 'only_matching': True,
3479 }, {
3480 'url': 'https://www.youtube.com/TheYoungTurks/live',
3481 'only_matching': True,
3482 }, {
3483 'url': 'https://www.youtube.com/hashtag/cctv9',
3484 'info_dict': {
3485 'id': 'cctv9',
3486 'title': '#cctv9',
3487 },
3488 'playlist_mincount': 350,
3489 }, {
3490 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3491 'only_matching': True,
3492 }, {
3493 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3494 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3495 'only_matching': True
3496 }, {
3497 'note': '/browse/ should redirect to /channel/',
3498 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3499 'only_matching': True
3500 }, {
3501 'note': 'VLPL, should redirect to playlist?list=PL...',
3502 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3503 'info_dict': {
3504 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3505 'uploader': 'NoCopyrightSounds',
3506 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3507 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3508 'title': 'NCS Releases',
3509 },
3510 'playlist_mincount': 166,
3511 }, {
3512 'note': 'Topic, should redirect to playlist?list=UU...',
3513 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3514 'info_dict': {
3515 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3516 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3517 'title': 'Uploads from Royalty Free Music - Topic',
3518 'uploader': 'Royalty Free Music - Topic',
3519 },
3520 'expected_warnings': [
3521 'A channel/user page was given',
3522 'The URL does not have a videos tab',
3523 ],
3524 'playlist_mincount': 101,
3525 }, {
3526 'note': 'Topic without a UU playlist',
3527 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3528 'info_dict': {
3529 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3530 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3531 },
3532 'expected_warnings': [
3533 'A channel/user page was given',
3534 'The URL does not have a videos tab',
3535 'Falling back to channel URL',
3536 ],
3537 'playlist_mincount': 9,
3538 }, {
3539 'note': 'Youtube music Album',
3540 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3541 'info_dict': {
3542 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3543 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3544 },
3545 'playlist_count': 50,
3546 }, {
3547 'note': 'unlisted single video playlist',
3548 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3549 'info_dict': {
3550 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3551 'uploader': 'colethedj',
3552 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3553 'title': 'yt-dlp unlisted playlist test',
3554 'availability': 'unlisted'
3555 },
3556 'playlist_count': 1,
3557 }, {
3558 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
3559 'url': 'https://www.youtube.com/feed/recommended',
3560 'info_dict': {
3561 'id': 'recommended',
3562 'title': 'recommended',
3563 },
3564 'playlist_mincount': 50,
3565 'params': {
3566 'skip_download': True,
3567 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3568 },
3569 }, {
3570 'note': 'API Fallback: /videos tab, sorted by oldest first',
3571 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
3572 'info_dict': {
3573 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
3574 'title': 'Cody\'sLab - Videos',
3575 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
3576 'uploader': 'Cody\'sLab',
3577 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
3578 },
3579 'playlist_mincount': 650,
3580 'params': {
3581 'skip_download': True,
3582 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3583 },
3584 }, {
3585 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
3586 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3587 'info_dict': {
3588 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3589 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3590 'title': 'Uploads from Royalty Free Music - Topic',
3591 'uploader': 'Royalty Free Music - Topic',
3592 },
3593 'expected_warnings': [
3594 'A channel/user page was given',
3595 'The URL does not have a videos tab',
3596 ],
3597 'playlist_mincount': 101,
3598 'params': {
3599 'skip_download': True,
3600 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3601 },
3602 }]
3603
3604 @classmethod
3605 def suitable(cls, url):
3606 return False if YoutubeIE.suitable(url) else super(
3607 YoutubeTabIE, cls).suitable(url)
3608
3609 def _extract_channel_id(self, webpage):
3610 channel_id = self._html_search_meta(
3611 'channelId', webpage, 'channel id', default=None)
3612 if channel_id:
3613 return channel_id
3614 channel_url = self._html_search_meta(
3615 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3616 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3617 'twitter:app:url:googleplay'), webpage, 'channel url')
3618 return self._search_regex(
3619 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3620 channel_url, 'channel id')
3621
3622 @staticmethod
3623 def _extract_basic_item_renderer(item):
3624 # Modified from _extract_grid_item_renderer
3625 known_basic_renderers = (
3626 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3627 )
3628 for key, renderer in item.items():
3629 if not isinstance(renderer, dict):
3630 continue
3631 elif key in known_basic_renderers:
3632 return renderer
3633 elif key.startswith('grid') and key.endswith('Renderer'):
3634 return renderer
3635
3636 def _grid_entries(self, grid_renderer):
3637 for item in grid_renderer['items']:
3638 if not isinstance(item, dict):
3639 continue
3640 renderer = self._extract_basic_item_renderer(item)
3641 if not isinstance(renderer, dict):
3642 continue
3643 title = self._get_text(renderer, 'title')
3644
3645 # playlist
3646 playlist_id = renderer.get('playlistId')
3647 if playlist_id:
3648 yield self.url_result(
3649 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3650 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3651 video_title=title)
3652 continue
3653 # video
3654 video_id = renderer.get('videoId')
3655 if video_id:
3656 yield self._extract_video(renderer)
3657 continue
3658 # channel
3659 channel_id = renderer.get('channelId')
3660 if channel_id:
3661 yield self.url_result(
3662 'https://www.youtube.com/channel/%s' % channel_id,
3663 ie=YoutubeTabIE.ie_key(), video_title=title)
3664 continue
3665 # generic endpoint URL support
3666 ep_url = urljoin('https://www.youtube.com/', try_get(
3667 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3668 compat_str))
3669 if ep_url:
3670 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3671 if ie.suitable(ep_url):
3672 yield self.url_result(
3673 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3674 break
3675
3676 def _shelf_entries_from_content(self, shelf_renderer):
3677 content = shelf_renderer.get('content')
3678 if not isinstance(content, dict):
3679 return
3680 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3681 if renderer:
3682 # TODO: add support for nested playlists so each shelf is processed
3683 # as separate playlist
3684 # TODO: this includes only first N items
3685 for entry in self._grid_entries(renderer):
3686 yield entry
3687 renderer = content.get('horizontalListRenderer')
3688 if renderer:
3689 # TODO
3690 pass
3691
3692 def _shelf_entries(self, shelf_renderer, skip_channels=False):
3693 ep = try_get(
3694 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3695 compat_str)
3696 shelf_url = urljoin('https://www.youtube.com', ep)
3697 if shelf_url:
3698 # Skipping links to another channels, note that checking for
3699 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3700 # will not work
3701 if skip_channels and '/channels?' in shelf_url:
3702 return
3703 title = self._get_text(shelf_renderer, 'title')
3704 yield self.url_result(shelf_url, video_title=title)
3705 # Shelf may not contain shelf URL, fallback to extraction from content
3706 for entry in self._shelf_entries_from_content(shelf_renderer):
3707 yield entry
3708
3709 def _playlist_entries(self, video_list_renderer):
3710 for content in video_list_renderer['contents']:
3711 if not isinstance(content, dict):
3712 continue
3713 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3714 if not isinstance(renderer, dict):
3715 continue
3716 video_id = renderer.get('videoId')
3717 if not video_id:
3718 continue
3719 yield self._extract_video(renderer)
3720
3721 def _rich_entries(self, rich_grid_renderer):
3722 renderer = try_get(
3723 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3724 video_id = renderer.get('videoId')
3725 if not video_id:
3726 return
3727 yield self._extract_video(renderer)
3728
3729 def _video_entry(self, video_renderer):
3730 video_id = video_renderer.get('videoId')
3731 if video_id:
3732 return self._extract_video(video_renderer)
3733
3734 def _post_thread_entries(self, post_thread_renderer):
3735 post_renderer = try_get(
3736 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3737 if not post_renderer:
3738 return
3739 # video attachment
3740 video_renderer = try_get(
3741 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3742 video_id = video_renderer.get('videoId')
3743 if video_id:
3744 entry = self._extract_video(video_renderer)
3745 if entry:
3746 yield entry
3747 # playlist attachment
3748 playlist_id = try_get(
3749 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3750 if playlist_id:
3751 yield self.url_result(
3752 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3753 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3754 # inline video links
3755 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3756 for run in runs:
3757 if not isinstance(run, dict):
3758 continue
3759 ep_url = try_get(
3760 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3761 if not ep_url:
3762 continue
3763 if not YoutubeIE.suitable(ep_url):
3764 continue
3765 ep_video_id = YoutubeIE._match_id(ep_url)
3766 if video_id == ep_video_id:
3767 continue
3768 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3769
3770 def _post_thread_continuation_entries(self, post_thread_continuation):
3771 contents = post_thread_continuation.get('contents')
3772 if not isinstance(contents, list):
3773 return
3774 for content in contents:
3775 renderer = content.get('backstagePostThreadRenderer')
3776 if not isinstance(renderer, dict):
3777 continue
3778 for entry in self._post_thread_entries(renderer):
3779 yield entry
3780
3781 r''' # unused
3782 def _rich_grid_entries(self, contents):
3783 for content in contents:
3784 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3785 if video_renderer:
3786 entry = self._video_entry(video_renderer)
3787 if entry:
3788 yield entry
3789 '''
3790 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3791
3792 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3793 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3794 for content in contents:
3795 if not isinstance(content, dict):
3796 continue
3797 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3798 if not is_renderer:
3799 renderer = content.get('richItemRenderer')
3800 if renderer:
3801 for entry in self._rich_entries(renderer):
3802 yield entry
3803 continuation_list[0] = self._extract_continuation(parent_renderer)
3804 continue
3805 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3806 for isr_content in isr_contents:
3807 if not isinstance(isr_content, dict):
3808 continue
3809
3810 known_renderers = {
3811 'playlistVideoListRenderer': self._playlist_entries,
3812 'gridRenderer': self._grid_entries,
3813 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3814 'backstagePostThreadRenderer': self._post_thread_entries,
3815 'videoRenderer': lambda x: [self._video_entry(x)],
3816 }
3817 for key, renderer in isr_content.items():
3818 if key not in known_renderers:
3819 continue
3820 for entry in known_renderers[key](renderer):
3821 if entry:
3822 yield entry
3823 continuation_list[0] = self._extract_continuation(renderer)
3824 break
3825
3826 if not continuation_list[0]:
3827 continuation_list[0] = self._extract_continuation(is_renderer)
3828
3829 if not continuation_list[0]:
3830 continuation_list[0] = self._extract_continuation(parent_renderer)
3831
3832 continuation_list = [None] # Python 2 does not support nonlocal
3833 tab_content = try_get(tab, lambda x: x['content'], dict)
3834 if not tab_content:
3835 return
3836 parent_renderer = (
3837 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3838 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3839 for entry in extract_entries(parent_renderer):
3840 yield entry
3841 continuation = continuation_list[0]
3842
3843 for page_num in itertools.count(1):
3844 if not continuation:
3845 break
3846 headers = self.generate_api_headers(
3847 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3848 response = self._extract_response(
3849 item_id='%s page %s' % (item_id, page_num),
3850 query=continuation, headers=headers, ytcfg=ytcfg,
3851 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3852
3853 if not response:
3854 break
3855 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3856 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3857 visitor_data = self._extract_visitor_data(response) or visitor_data
3858
3859 known_continuation_renderers = {
3860 'playlistVideoListContinuation': self._playlist_entries,
3861 'gridContinuation': self._grid_entries,
3862 'itemSectionContinuation': self._post_thread_continuation_entries,
3863 'sectionListContinuation': extract_entries, # for feeds
3864 }
3865 continuation_contents = try_get(
3866 response, lambda x: x['continuationContents'], dict) or {}
3867 continuation_renderer = None
3868 for key, value in continuation_contents.items():
3869 if key not in known_continuation_renderers:
3870 continue
3871 continuation_renderer = value
3872 continuation_list = [None]
3873 for entry in known_continuation_renderers[key](continuation_renderer):
3874 yield entry
3875 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3876 break
3877 if continuation_renderer:
3878 continue
3879
3880 known_renderers = {
3881 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3882 'gridVideoRenderer': (self._grid_entries, 'items'),
3883 'gridChannelRenderer': (self._grid_entries, 'items'),
3884 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3885 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
3886 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
3887 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3888 }
3889 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3890 continuation_items = try_get(
3891 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3892 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3893 video_items_renderer = None
3894 for key, value in continuation_item.items():
3895 if key not in known_renderers:
3896 continue
3897 video_items_renderer = {known_renderers[key][1]: continuation_items}
3898 continuation_list = [None]
3899 for entry in known_renderers[key][0](video_items_renderer):
3900 yield entry
3901 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3902 break
3903 if video_items_renderer:
3904 continue
3905 break
3906
3907 @staticmethod
3908 def _extract_selected_tab(tabs):
3909 for tab in tabs:
3910 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3911 if renderer.get('selected') is True:
3912 return renderer
3913 else:
3914 raise ExtractorError('Unable to find selected tab')
3915
3916 @classmethod
3917 def _extract_uploader(cls, data):
3918 uploader = {}
3919 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3920 owner = try_get(
3921 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3922 if owner:
3923 uploader['uploader'] = owner.get('text')
3924 uploader['uploader_id'] = try_get(
3925 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3926 uploader['uploader_url'] = urljoin(
3927 'https://www.youtube.com/',
3928 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3929 return {k: v for k, v in uploader.items() if v is not None}
3930
3931 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
3932 playlist_id = title = description = channel_url = channel_name = channel_id = None
3933 thumbnails_list = []
3934 tags = []
3935
3936 selected_tab = self._extract_selected_tab(tabs)
3937 renderer = try_get(
3938 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3939 if renderer:
3940 channel_name = renderer.get('title')
3941 channel_url = renderer.get('channelUrl')
3942 channel_id = renderer.get('externalId')
3943 else:
3944 renderer = try_get(
3945 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3946
3947 if renderer:
3948 title = renderer.get('title')
3949 description = renderer.get('description', '')
3950 playlist_id = channel_id
3951 tags = renderer.get('keywords', '').split()
3952 thumbnails_list = (
3953 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3954 or try_get(
3955 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3956 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3957 list)
3958 or [])
3959
3960 thumbnails = []
3961 for t in thumbnails_list:
3962 if not isinstance(t, dict):
3963 continue
3964 thumbnail_url = url_or_none(t.get('url'))
3965 if not thumbnail_url:
3966 continue
3967 thumbnails.append({
3968 'url': thumbnail_url,
3969 'width': int_or_none(t.get('width')),
3970 'height': int_or_none(t.get('height')),
3971 })
3972 if playlist_id is None:
3973 playlist_id = item_id
3974 if title is None:
3975 title = (
3976 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3977 or playlist_id)
3978 title += format_field(selected_tab, 'title', ' - %s')
3979 title += format_field(selected_tab, 'expandedText', ' - %s')
3980 metadata = {
3981 'playlist_id': playlist_id,
3982 'playlist_title': title,
3983 'playlist_description': description,
3984 'uploader': channel_name,
3985 'uploader_id': channel_id,
3986 'uploader_url': channel_url,
3987 'thumbnails': thumbnails,
3988 'tags': tags,
3989 }
3990 availability = self._extract_availability(data)
3991 if availability:
3992 metadata['availability'] = availability
3993 if not channel_id:
3994 metadata.update(self._extract_uploader(data))
3995 metadata.update({
3996 'channel': metadata['uploader'],
3997 'channel_id': metadata['uploader_id'],
3998 'channel_url': metadata['uploader_url']})
3999 return self.playlist_result(
4000 self._entries(
4001 selected_tab, playlist_id, ytcfg,
4002 self._extract_account_syncid(ytcfg, data),
4003 self._extract_visitor_data(data, ytcfg)),
4004 **metadata)
4005
4006 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
4007 first_id = last_id = response = None
4008 for page_num in itertools.count(1):
4009 videos = list(self._playlist_entries(playlist))
4010 if not videos:
4011 return
4012 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4013 if start >= len(videos):
4014 return
4015 for video in videos[start:]:
4016 if video['id'] == first_id:
4017 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4018 return
4019 yield video
4020 first_id = first_id or videos[0]['id']
4021 last_id = videos[-1]['id']
4022 watch_endpoint = try_get(
4023 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4024 headers = self.generate_api_headers(
4025 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4026 visitor_data=self._extract_visitor_data(response, data, ytcfg))
4027 query = {
4028 'playlistId': playlist_id,
4029 'videoId': watch_endpoint.get('videoId') or last_id,
4030 'index': watch_endpoint.get('index') or len(videos),
4031 'params': watch_endpoint.get('params') or 'OAE%3D'
4032 }
4033 response = self._extract_response(
4034 item_id='%s page %d' % (playlist_id, page_num),
4035 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4036 check_get_keys='contents'
4037 )
4038 playlist = try_get(
4039 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4040
4041 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
4042 title = playlist.get('title') or try_get(
4043 data, lambda x: x['titleText']['simpleText'], compat_str)
4044 playlist_id = playlist.get('playlistId') or item_id
4045
4046 # Delegating everything except mix playlists to regular tab-based playlist URL
4047 playlist_url = urljoin(url, try_get(
4048 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4049 compat_str))
4050 if playlist_url and playlist_url != url:
4051 return self.url_result(
4052 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4053 video_title=title)
4054
4055 return self.playlist_result(
4056 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
4057 playlist_id=playlist_id, playlist_title=title)
4058
4059 def _extract_availability(self, data):
4060 """
4061 Gets the availability of a given playlist/tab.
4062 Note: Unless YouTube tells us explicitly, we do not assume it is public
4063 @param data: response
4064 """
4065 is_private = is_unlisted = None
4066 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4067 badge_labels = self._extract_badges(renderer)
4068
4069 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4070 privacy_dropdown_entries = try_get(
4071 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4072 for renderer_dict in privacy_dropdown_entries:
4073 is_selected = try_get(
4074 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4075 if not is_selected:
4076 continue
4077 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4078 if label:
4079 badge_labels.add(label.lower())
4080 break
4081
4082 for badge_label in badge_labels:
4083 if badge_label == 'unlisted':
4084 is_unlisted = True
4085 elif badge_label == 'private':
4086 is_private = True
4087 elif badge_label == 'public':
4088 is_unlisted = is_private = False
4089 return self._availability(is_private, False, False, False, is_unlisted)
4090
4091 @staticmethod
4092 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4093 sidebar_renderer = try_get(
4094 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4095 for item in sidebar_renderer:
4096 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4097 if renderer:
4098 return renderer
4099
4100 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
4101 """
4102 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4103 """
4104 browse_id = params = None
4105 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4106 if not renderer:
4107 return
4108 menu_renderer = try_get(
4109 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4110 for menu_item in menu_renderer:
4111 if not isinstance(menu_item, dict):
4112 continue
4113 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4114 text = try_get(
4115 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4116 if not text or text.lower() != 'show unavailable videos':
4117 continue
4118 browse_endpoint = try_get(
4119 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4120 browse_id = browse_endpoint.get('browseId')
4121 params = browse_endpoint.get('params')
4122 break
4123
4124 headers = self.generate_api_headers(
4125 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4126 visitor_data=self._extract_visitor_data(data, ytcfg))
4127 query = {
4128 'params': params or 'wgYCCAA=',
4129 'browseId': browse_id or 'VL%s' % item_id
4130 }
4131 return self._extract_response(
4132 item_id=item_id, headers=headers, query=query,
4133 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4134 note='Downloading API JSON with unavailable videos')
4135
4136 def _extract_webpage(self, url, item_id, fatal=True):
4137 retries = self.get_param('extractor_retries', 3)
4138 count = -1
4139 webpage = data = last_error = None
4140 while count < retries:
4141 count += 1
4142 # Sometimes youtube returns a webpage with incomplete ytInitialData
4143 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4144 if last_error:
4145 self.report_warning('%s. Retrying ...' % last_error)
4146 try:
4147 webpage = self._download_webpage(
4148 url, item_id,
4149 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
4150 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4151 except ExtractorError as e:
4152 if isinstance(e.cause, network_exceptions):
4153 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
4154 last_error = error_to_compat_str(e.cause or e.msg)
4155 if count < retries:
4156 continue
4157 if fatal:
4158 raise
4159 self.report_warning(error_to_compat_str(e))
4160 break
4161 else:
4162 try:
4163 self._extract_and_report_alerts(data)
4164 except ExtractorError as e:
4165 if fatal:
4166 raise
4167 self.report_warning(error_to_compat_str(e))
4168 break
4169
4170 if dict_get(data, ('contents', 'currentVideoEndpoint')):
4171 break
4172
4173 last_error = 'Incomplete yt initial data received'
4174 if count >= retries:
4175 if fatal:
4176 raise ExtractorError(last_error)
4177 self.report_warning(last_error)
4178 break
4179
4180 return webpage, data
4181
4182 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4183 data = None
4184 if 'webpage' not in self._configuration_arg('skip'):
4185 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4186 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4187 if not data:
4188 if not ytcfg and self.is_authenticated:
4189 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
4190 if 'authcheck' not in self._configuration_arg('skip') and fatal:
4191 raise ExtractorError(
4192 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
4193 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4194 expected=True)
4195 self.report_warning(msg, only_once=True)
4196 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4197 return data, ytcfg
4198
4199 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4200 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4201 resolve_response = self._extract_response(
4202 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4203 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4204 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4205 for ep_key, ep in endpoints.items():
4206 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4207 if params:
4208 return self._extract_response(
4209 item_id=item_id, query=params, ep=ep, headers=headers,
4210 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4211 check_get_keys=('contents', 'currentVideoEndpoint'))
4212 err_note = 'Failed to resolve url (does the playlist exist?)'
4213 if fatal:
4214 raise ExtractorError(err_note, expected=True)
4215 self.report_warning(err_note, item_id)
4216
4217 @staticmethod
4218 def _smuggle_data(entries, data):
4219 for entry in entries:
4220 if data:
4221 entry['url'] = smuggle_url(entry['url'], data)
4222 yield entry
4223
4224 def _real_extract(self, url):
4225 url, smuggled_data = unsmuggle_url(url, {})
4226 if self.is_music_url(url):
4227 smuggled_data['is_music_url'] = True
4228 info_dict = self.__real_extract(url, smuggled_data)
4229 if info_dict.get('entries'):
4230 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4231 return info_dict
4232
4233 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4234
4235 def __real_extract(self, url, smuggled_data):
4236 item_id = self._match_id(url)
4237 url = compat_urlparse.urlunparse(
4238 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4239 compat_opts = self.get_param('compat_opts', [])
4240
4241 def get_mobj(url):
4242 mobj = self._url_re.match(url).groupdict()
4243 mobj.update((k, '') for k, v in mobj.items() if v is None)
4244 return mobj
4245
4246 mobj = get_mobj(url)
4247 # Youtube returns incomplete data if tabname is not lower case
4248 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4249 if is_channel:
4250 if smuggled_data.get('is_music_url'):
4251 if item_id[:2] == 'VL':
4252 # Youtube music VL channels have an equivalent playlist
4253 item_id = item_id[2:]
4254 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4255 elif item_id[:2] == 'MP':
4256 # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4257 mdata = self._extract_tab_endpoint(
4258 'https://music.youtube.com/channel/%s' % item_id, item_id, default_client='web_music')
4259 murl = traverse_obj(
4260 mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), get_all=False, expected_type=compat_str)
4261 if not murl:
4262 raise ExtractorError('Failed to resolve album to playlist.')
4263 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
4264 elif mobj['channel_type'] == 'browse':
4265 # Youtube music /browse/ should be changed to /channel/
4266 pre = 'https://www.youtube.com/channel/%s' % item_id
4267 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4268 # Home URLs should redirect to /videos/
4269 self.report_warning(
4270 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4271 'To download only the videos in the home page, add a "/featured" to the URL')
4272 tab = '/videos'
4273
4274 url = ''.join((pre, tab, post))
4275 mobj = get_mobj(url)
4276
4277 # Handle both video/playlist URLs
4278 qs = parse_qs(url)
4279 video_id = qs.get('v', [None])[0]
4280 playlist_id = qs.get('list', [None])[0]
4281
4282 if not video_id and mobj['not_channel'].startswith('watch'):
4283 if not playlist_id:
4284 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4285 raise ExtractorError('Unable to recognize tab page')
4286 # Common mistake: https://www.youtube.com/watch?list=playlist_id
4287 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4288 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4289 mobj = get_mobj(url)
4290
4291 if video_id and playlist_id:
4292 if self.get_param('noplaylist'):
4293 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4294 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4295 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4296
4297 data, ytcfg = self._extract_data(url, item_id)
4298
4299 tabs = try_get(
4300 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4301 if tabs:
4302 selected_tab = self._extract_selected_tab(tabs)
4303 tab_name = selected_tab.get('title', '')
4304 if 'no-youtube-channel-redirect' not in compat_opts:
4305 if mobj['tab'] == '/live':
4306 # Live tab should have redirected to the video
4307 raise ExtractorError('The channel is not currently live', expected=True)
4308 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4309 if not mobj['not_channel'] and item_id[:2] == 'UC':
4310 # Topic channels don't have /videos. Use the equivalent playlist instead
4311 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4312 pl_id = 'UU%s' % item_id[2:]
4313 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4314 try:
4315 data, ytcfg, item_id, url = *self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True), pl_id, pl_url
4316 except ExtractorError:
4317 self.report_warning('The playlist gave error. Falling back to channel URL')
4318 else:
4319 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4320
4321 self.write_debug('Final URL: %s' % url)
4322
4323 # YouTube sometimes provides a button to reload playlist with unavailable videos.
4324 if 'no-youtube-unavailable-videos' not in compat_opts:
4325 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
4326 self._extract_and_report_alerts(data, only_once=True)
4327 tabs = try_get(
4328 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4329 if tabs:
4330 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
4331
4332 playlist = try_get(
4333 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4334 if playlist:
4335 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
4336
4337 video_id = try_get(
4338 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4339 compat_str) or video_id
4340 if video_id:
4341 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4342 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4343 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4344
4345 raise ExtractorError('Unable to recognize tab page')
4346
4347
4348 class YoutubePlaylistIE(InfoExtractor):
4349 IE_DESC = 'YouTube playlists'
4350 _VALID_URL = r'''(?x)(?:
4351 (?:https?://)?
4352 (?:\w+\.)?
4353 (?:
4354 (?:
4355 youtube(?:kids)?\.com|
4356 %(invidious)s
4357 )
4358 /.*?\?.*?\blist=
4359 )?
4360 (?P<id>%(playlist_id)s)
4361 )''' % {
4362 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4363 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4364 }
4365 IE_NAME = 'youtube:playlist'
4366 _TESTS = [{
4367 'note': 'issue #673',
4368 'url': 'PLBB231211A4F62143',
4369 'info_dict': {
4370 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4371 'id': 'PLBB231211A4F62143',
4372 'uploader': 'Wickydoo',
4373 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4374 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4375 },
4376 'playlist_mincount': 29,
4377 }, {
4378 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4379 'info_dict': {
4380 'title': 'YDL_safe_search',
4381 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4382 },
4383 'playlist_count': 2,
4384 'skip': 'This playlist is private',
4385 }, {
4386 'note': 'embedded',
4387 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4388 'playlist_count': 4,
4389 'info_dict': {
4390 'title': 'JODA15',
4391 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4392 'uploader': 'milan',
4393 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4394 }
4395 }, {
4396 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4397 'playlist_mincount': 654,
4398 'info_dict': {
4399 'title': '2018 Chinese New Singles (11/6 updated)',
4400 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4401 'uploader': 'LBK',
4402 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4403 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4404 }
4405 }, {
4406 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4407 'only_matching': True,
4408 }, {
4409 # music album playlist
4410 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4411 'only_matching': True,
4412 }]
4413
4414 @classmethod
4415 def suitable(cls, url):
4416 if YoutubeTabIE.suitable(url):
4417 return False
4418 from ..utils import parse_qs
4419 qs = parse_qs(url)
4420 if qs.get('v', [None])[0]:
4421 return False
4422 return super(YoutubePlaylistIE, cls).suitable(url)
4423
4424 def _real_extract(self, url):
4425 playlist_id = self._match_id(url)
4426 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4427 url = update_url_query(
4428 'https://www.youtube.com/playlist',
4429 parse_qs(url) or {'list': playlist_id})
4430 if is_music_url:
4431 url = smuggle_url(url, {'is_music_url': True})
4432 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4433
4434
4435 class YoutubeYtBeIE(InfoExtractor):
4436 IE_DESC = 'youtu.be'
4437 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4438 _TESTS = [{
4439 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4440 'info_dict': {
4441 'id': 'yeWKywCrFtk',
4442 'ext': 'mp4',
4443 'title': 'Small Scale Baler and Braiding Rugs',
4444 'uploader': 'Backus-Page House Museum',
4445 'uploader_id': 'backuspagemuseum',
4446 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4447 'upload_date': '20161008',
4448 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4449 'categories': ['Nonprofits & Activism'],
4450 'tags': list,
4451 'like_count': int,
4452 'dislike_count': int,
4453 },
4454 'params': {
4455 'noplaylist': True,
4456 'skip_download': True,
4457 },
4458 }, {
4459 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4460 'only_matching': True,
4461 }]
4462
4463 def _real_extract(self, url):
4464 mobj = self._match_valid_url(url)
4465 video_id = mobj.group('id')
4466 playlist_id = mobj.group('playlist_id')
4467 return self.url_result(
4468 update_url_query('https://www.youtube.com/watch', {
4469 'v': video_id,
4470 'list': playlist_id,
4471 'feature': 'youtu.be',
4472 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4473
4474
4475 class YoutubeYtUserIE(InfoExtractor):
4476 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
4477 _VALID_URL = r'ytuser:(?P<id>.+)'
4478 _TESTS = [{
4479 'url': 'ytuser:phihag',
4480 'only_matching': True,
4481 }]
4482
4483 def _real_extract(self, url):
4484 user_id = self._match_id(url)
4485 return self.url_result(
4486 'https://www.youtube.com/user/%s/videos' % user_id,
4487 ie=YoutubeTabIE.ie_key(), video_id=user_id)
4488
4489
4490 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4491 IE_NAME = 'youtube:favorites'
4492 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
4493 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4494 _LOGIN_REQUIRED = True
4495 _TESTS = [{
4496 'url': ':ytfav',
4497 'only_matching': True,
4498 }, {
4499 'url': ':ytfavorites',
4500 'only_matching': True,
4501 }]
4502
4503 def _real_extract(self, url):
4504 return self.url_result(
4505 'https://www.youtube.com/playlist?list=LL',
4506 ie=YoutubeTabIE.ie_key())
4507
4508
4509 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4510 IE_DESC = 'YouTube searches'
4511 IE_NAME = 'youtube:search'
4512 _SEARCH_KEY = 'ytsearch'
4513 _SEARCH_PARAMS = None
4514 _TESTS = []
4515
4516 def _search_results(self, query):
4517 data = {'query': query}
4518 if self._SEARCH_PARAMS:
4519 data['params'] = self._SEARCH_PARAMS
4520 continuation = {}
4521 for page_num in itertools.count(1):
4522 data.update(continuation)
4523 search = self._extract_response(
4524 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4525 check_get_keys=('contents', 'onResponseReceivedCommands')
4526 )
4527 if not search:
4528 break
4529 slr_contents = try_get(
4530 search,
4531 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4532 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4533 list)
4534 if not slr_contents:
4535 break
4536
4537 # Youtube sometimes adds promoted content to searches,
4538 # changing the index location of videos and token.
4539 # So we search through all entries till we find them.
4540 continuation = None
4541 for slr_content in slr_contents:
4542 if not continuation:
4543 continuation = self._extract_continuation({'contents': [slr_content]})
4544
4545 isr_contents = try_get(
4546 slr_content,
4547 lambda x: x['itemSectionRenderer']['contents'],
4548 list)
4549 if not isr_contents:
4550 continue
4551 for content in isr_contents:
4552 if not isinstance(content, dict):
4553 continue
4554 video = content.get('videoRenderer')
4555 if not isinstance(video, dict):
4556 continue
4557 video_id = video.get('videoId')
4558 if not video_id:
4559 continue
4560
4561 yield self._extract_video(video)
4562
4563 if not continuation:
4564 break
4565
4566
4567 class YoutubeSearchDateIE(YoutubeSearchIE):
4568 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4569 _SEARCH_KEY = 'ytsearchdate'
4570 IE_DESC = 'YouTube searches, newest videos first'
4571 _SEARCH_PARAMS = 'CAI%3D'
4572
4573
4574 class YoutubeSearchURLIE(YoutubeSearchIE):
4575 IE_DESC = 'YouTube search URLs with sorting and filter support'
4576 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4577 _SEARCH_KEY = None
4578 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4579 # _MAX_RESULTS = 100
4580 _TESTS = [{
4581 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4582 'playlist_mincount': 5,
4583 'info_dict': {
4584 'id': 'youtube-dl test video',
4585 'title': 'youtube-dl test video',
4586 }
4587 }, {
4588 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4589 'only_matching': True,
4590 }]
4591
4592 @classmethod
4593 def _make_valid_url(cls):
4594 return cls._VALID_URL
4595
4596 def _real_extract(self, url):
4597 qs = parse_qs(url)
4598 query = (qs.get('search_query') or qs.get('q'))[0]
4599 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4600 return self._get_n_results(query, self._MAX_RESULTS)
4601
4602
4603 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4604 """
4605 Base class for feed extractors
4606 Subclasses must define the _FEED_NAME property.
4607 """
4608 _LOGIN_REQUIRED = True
4609 _TESTS = []
4610
4611 @property
4612 def IE_NAME(self):
4613 return 'youtube:%s' % self._FEED_NAME
4614
4615 def _real_extract(self, url):
4616 return self.url_result(
4617 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4618 ie=YoutubeTabIE.ie_key())
4619
4620
4621 class YoutubeWatchLaterIE(InfoExtractor):
4622 IE_NAME = 'youtube:watchlater'
4623 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
4624 _VALID_URL = r':ytwatchlater'
4625 _TESTS = [{
4626 'url': ':ytwatchlater',
4627 'only_matching': True,
4628 }]
4629
4630 def _real_extract(self, url):
4631 return self.url_result(
4632 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4633
4634
4635 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4636 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
4637 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4638 _FEED_NAME = 'recommended'
4639 _LOGIN_REQUIRED = False
4640 _TESTS = [{
4641 'url': ':ytrec',
4642 'only_matching': True,
4643 }, {
4644 'url': ':ytrecommended',
4645 'only_matching': True,
4646 }, {
4647 'url': 'https://youtube.com',
4648 'only_matching': True,
4649 }]
4650
4651
4652 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4653 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
4654 _VALID_URL = r':ytsub(?:scription)?s?'
4655 _FEED_NAME = 'subscriptions'
4656 _TESTS = [{
4657 'url': ':ytsubs',
4658 'only_matching': True,
4659 }, {
4660 'url': ':ytsubscriptions',
4661 'only_matching': True,
4662 }]
4663
4664
4665 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4666 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
4667 _VALID_URL = r':ythis(?:tory)?'
4668 _FEED_NAME = 'history'
4669 _TESTS = [{
4670 'url': ':ythistory',
4671 'only_matching': True,
4672 }]
4673
4674
4675 class YoutubeTruncatedURLIE(InfoExtractor):
4676 IE_NAME = 'youtube:truncated_url'
4677 IE_DESC = False # Do not list
4678 _VALID_URL = r'''(?x)
4679 (?:https?://)?
4680 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4681 (?:watch\?(?:
4682 feature=[a-z_]+|
4683 annotation_id=annotation_[^&]+|
4684 x-yt-cl=[0-9]+|
4685 hl=[^&]*|
4686 t=[0-9]+
4687 )?
4688 |
4689 attribution_link\?a=[^&]+
4690 )
4691 $
4692 '''
4693
4694 _TESTS = [{
4695 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4696 'only_matching': True,
4697 }, {
4698 'url': 'https://www.youtube.com/watch?',
4699 'only_matching': True,
4700 }, {
4701 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4702 'only_matching': True,
4703 }, {
4704 'url': 'https://www.youtube.com/watch?feature=foo',
4705 'only_matching': True,
4706 }, {
4707 'url': 'https://www.youtube.com/watch?hl=en-GB',
4708 'only_matching': True,
4709 }, {
4710 'url': 'https://www.youtube.com/watch?t=2372',
4711 'only_matching': True,
4712 }]
4713
4714 def _real_extract(self, url):
4715 raise ExtractorError(
4716 'Did you forget to quote the URL? Remember that & is a meta '
4717 'character in most shells, so you want to put the URL in quotes, '
4718 'like youtube-dl '
4719 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4720 ' or simply youtube-dl BaW_jenozKc .',
4721 expected=True)
4722
4723
4724 class YoutubeClipIE(InfoExtractor):
4725 IE_NAME = 'youtube:clip'
4726 IE_DESC = False # Do not list
4727 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4728
4729 def _real_extract(self, url):
4730 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4731 return self.url_result(url, 'Generic')
4732
4733
4734 class YoutubeTruncatedIDIE(InfoExtractor):
4735 IE_NAME = 'youtube:truncated_id'
4736 IE_DESC = False # Do not list
4737 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4738
4739 _TESTS = [{
4740 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4741 'only_matching': True,
4742 }]
4743
4744 def _real_extract(self, url):
4745 video_id = self._match_id(url)
4746 raise ExtractorError(
4747 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4748 expected=True)