]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[tiktok] Fix `extractor_key` used in archive
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 import calendar
6 import copy
7 import datetime
8 import functools
9 import hashlib
10 import itertools
11 import json
12 import math
13 import os.path
14 import random
15 import re
16 import sys
17 import time
18 import traceback
19 import threading
20
21 from .common import InfoExtractor, SearchInfoExtractor
22 from ..compat import (
23 compat_chr,
24 compat_HTTPError,
25 compat_parse_qs,
26 compat_str,
27 compat_urllib_parse_unquote_plus,
28 compat_urllib_parse_urlencode,
29 compat_urllib_parse_urlparse,
30 compat_urlparse,
31 )
32 from ..jsinterp import JSInterpreter
33 from ..utils import (
34 bug_reports_message,
35 clean_html,
36 datetime_from_str,
37 dict_get,
38 error_to_compat_str,
39 ExtractorError,
40 float_or_none,
41 format_field,
42 int_or_none,
43 is_html,
44 join_nonempty,
45 mimetype2ext,
46 network_exceptions,
47 NO_DEFAULT,
48 orderedSet,
49 parse_codecs,
50 parse_count,
51 parse_duration,
52 parse_iso8601,
53 parse_qs,
54 qualities,
55 remove_end,
56 remove_start,
57 smuggle_url,
58 str_or_none,
59 str_to_int,
60 strftime_or_none,
61 traverse_obj,
62 try_get,
63 unescapeHTML,
64 unified_strdate,
65 unsmuggle_url,
66 update_url_query,
67 url_or_none,
68 urljoin,
69 variadic,
70 )
71
72
73 def get_first(obj, keys, **kwargs):
74 return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
75
76
77 # any clients starting with _ cannot be explicity requested by the user
78 INNERTUBE_CLIENTS = {
79 'web': {
80 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
81 'INNERTUBE_CONTEXT': {
82 'client': {
83 'clientName': 'WEB',
84 'clientVersion': '2.20210622.10.00',
85 }
86 },
87 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
88 },
89 'web_embedded': {
90 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
91 'INNERTUBE_CONTEXT': {
92 'client': {
93 'clientName': 'WEB_EMBEDDED_PLAYER',
94 'clientVersion': '1.20210620.0.1',
95 },
96 },
97 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
98 },
99 'web_music': {
100 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
101 'INNERTUBE_HOST': 'music.youtube.com',
102 'INNERTUBE_CONTEXT': {
103 'client': {
104 'clientName': 'WEB_REMIX',
105 'clientVersion': '1.20210621.00.00',
106 }
107 },
108 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
109 },
110 'web_creator': {
111 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
112 'INNERTUBE_CONTEXT': {
113 'client': {
114 'clientName': 'WEB_CREATOR',
115 'clientVersion': '1.20210621.00.00',
116 }
117 },
118 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
119 },
120 'android': {
121 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
122 'INNERTUBE_CONTEXT': {
123 'client': {
124 'clientName': 'ANDROID',
125 'clientVersion': '16.20',
126 }
127 },
128 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
129 'REQUIRE_JS_PLAYER': False
130 },
131 'android_embedded': {
132 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
133 'INNERTUBE_CONTEXT': {
134 'client': {
135 'clientName': 'ANDROID_EMBEDDED_PLAYER',
136 'clientVersion': '16.20',
137 },
138 },
139 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
140 'REQUIRE_JS_PLAYER': False
141 },
142 'android_music': {
143 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
144 'INNERTUBE_HOST': 'music.youtube.com',
145 'INNERTUBE_CONTEXT': {
146 'client': {
147 'clientName': 'ANDROID_MUSIC',
148 'clientVersion': '4.32',
149 }
150 },
151 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
152 'REQUIRE_JS_PLAYER': False
153 },
154 'android_creator': {
155 'INNERTUBE_CONTEXT': {
156 'client': {
157 'clientName': 'ANDROID_CREATOR',
158 'clientVersion': '21.24.100',
159 },
160 },
161 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
162 'REQUIRE_JS_PLAYER': False
163 },
164 # ios has HLS live streams
165 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
166 'ios': {
167 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
168 'INNERTUBE_CONTEXT': {
169 'client': {
170 'clientName': 'IOS',
171 'clientVersion': '16.20',
172 }
173 },
174 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
175 'REQUIRE_JS_PLAYER': False
176 },
177 'ios_embedded': {
178 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
179 'INNERTUBE_CONTEXT': {
180 'client': {
181 'clientName': 'IOS_MESSAGES_EXTENSION',
182 'clientVersion': '16.20',
183 },
184 },
185 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
186 'REQUIRE_JS_PLAYER': False
187 },
188 'ios_music': {
189 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
190 'INNERTUBE_HOST': 'music.youtube.com',
191 'INNERTUBE_CONTEXT': {
192 'client': {
193 'clientName': 'IOS_MUSIC',
194 'clientVersion': '4.32',
195 },
196 },
197 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
198 'REQUIRE_JS_PLAYER': False
199 },
200 'ios_creator': {
201 'INNERTUBE_CONTEXT': {
202 'client': {
203 'clientName': 'IOS_CREATOR',
204 'clientVersion': '21.24.100',
205 },
206 },
207 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
208 'REQUIRE_JS_PLAYER': False
209 },
210 # mweb has 'ultralow' formats
211 # See: https://github.com/yt-dlp/yt-dlp/pull/557
212 'mweb': {
213 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
214 'INNERTUBE_CONTEXT': {
215 'client': {
216 'clientName': 'MWEB',
217 'clientVersion': '2.20210721.07.00',
218 }
219 },
220 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
221 },
222 }
223
224
225 def build_innertube_clients():
226 third_party = {
227 'embedUrl': 'https://google.com', # Can be any valid URL
228 }
229 base_clients = ('android', 'web', 'ios', 'mweb')
230 priority = qualities(base_clients[::-1])
231
232 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
233 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
234 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
235 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
236 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
237 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
238
239 if client in base_clients:
240 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
241 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
242 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
243 agegate_ytcfg['priority'] -= 1
244 elif client.endswith('_embedded'):
245 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
246 ytcfg['priority'] -= 2
247 else:
248 ytcfg['priority'] -= 3
249
250
251 build_innertube_clients()
252
253
254 class YoutubeBaseInfoExtractor(InfoExtractor):
255 """Provide base functions for Youtube extractors"""
256
257 _RESERVED_NAMES = (
258 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
259 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
260 r'browse|oembed|get_video_info|iframe_api|s/player|'
261 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
262
263 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
264
265 _NETRC_MACHINE = 'youtube'
266
267 # If True it will raise an error if no login info is provided
268 _LOGIN_REQUIRED = False
269
270 _INVIDIOUS_SITES = (
271 # invidious-redirect websites
272 r'(?:www\.)?redirect\.invidious\.io',
273 r'(?:(?:www|dev)\.)?invidio\.us',
274 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
275 r'(?:www\.)?invidious\.pussthecat\.org',
276 r'(?:www\.)?invidious\.zee\.li',
277 r'(?:www\.)?invidious\.ethibox\.fr',
278 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
279 # youtube-dl invidious instances list
280 r'(?:(?:www|no)\.)?invidiou\.sh',
281 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
282 r'(?:www\.)?invidious\.kabi\.tk',
283 r'(?:www\.)?invidious\.mastodon\.host',
284 r'(?:www\.)?invidious\.zapashcanon\.fr',
285 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
286 r'(?:www\.)?invidious\.tinfoil-hat\.net',
287 r'(?:www\.)?invidious\.himiko\.cloud',
288 r'(?:www\.)?invidious\.reallyancient\.tech',
289 r'(?:www\.)?invidious\.tube',
290 r'(?:www\.)?invidiou\.site',
291 r'(?:www\.)?invidious\.site',
292 r'(?:www\.)?invidious\.xyz',
293 r'(?:www\.)?invidious\.nixnet\.xyz',
294 r'(?:www\.)?invidious\.048596\.xyz',
295 r'(?:www\.)?invidious\.drycat\.fr',
296 r'(?:www\.)?inv\.skyn3t\.in',
297 r'(?:www\.)?tube\.poal\.co',
298 r'(?:www\.)?tube\.connect\.cafe',
299 r'(?:www\.)?vid\.wxzm\.sx',
300 r'(?:www\.)?vid\.mint\.lgbt',
301 r'(?:www\.)?vid\.puffyan\.us',
302 r'(?:www\.)?yewtu\.be',
303 r'(?:www\.)?yt\.elukerio\.org',
304 r'(?:www\.)?yt\.lelux\.fi',
305 r'(?:www\.)?invidious\.ggc-project\.de',
306 r'(?:www\.)?yt\.maisputain\.ovh',
307 r'(?:www\.)?ytprivate\.com',
308 r'(?:www\.)?invidious\.13ad\.de',
309 r'(?:www\.)?invidious\.toot\.koeln',
310 r'(?:www\.)?invidious\.fdn\.fr',
311 r'(?:www\.)?watch\.nettohikari\.com',
312 r'(?:www\.)?invidious\.namazso\.eu',
313 r'(?:www\.)?invidious\.silkky\.cloud',
314 r'(?:www\.)?invidious\.exonip\.de',
315 r'(?:www\.)?invidious\.riverside\.rocks',
316 r'(?:www\.)?invidious\.blamefran\.net',
317 r'(?:www\.)?invidious\.moomoo\.de',
318 r'(?:www\.)?ytb\.trom\.tf',
319 r'(?:www\.)?yt\.cyberhost\.uk',
320 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
321 r'(?:www\.)?qklhadlycap4cnod\.onion',
322 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
323 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
324 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
325 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
326 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
327 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
328 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
329 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
330 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
331 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
332 )
333
334 def _login(self):
335 """
336 Attempt to log in to YouTube.
337 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
338 """
339
340 if (self._LOGIN_REQUIRED
341 and self.get_param('cookiefile') is None
342 and self.get_param('cookiesfrombrowser') is None):
343 self.raise_login_required(
344 'Login details are needed to download this content', method='cookies')
345 username, password = self._get_login_info()
346 if username:
347 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
348
349 def _initialize_consent(self):
350 cookies = self._get_cookies('https://www.youtube.com/')
351 if cookies.get('__Secure-3PSID'):
352 return
353 consent_id = None
354 consent = cookies.get('CONSENT')
355 if consent:
356 if 'YES' in consent.value:
357 return
358 consent_id = self._search_regex(
359 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
360 if not consent_id:
361 consent_id = random.randint(100, 999)
362 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
363
364 def _initialize_pref(self):
365 cookies = self._get_cookies('https://www.youtube.com/')
366 pref_cookie = cookies.get('PREF')
367 pref = {}
368 if pref_cookie:
369 try:
370 pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
371 except ValueError:
372 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
373 pref.update({'hl': 'en'})
374 self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
375
376 def _real_initialize(self):
377 self._initialize_pref()
378 self._initialize_consent()
379 self._login()
380
381 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
382 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
383 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
384
385 def _get_default_ytcfg(self, client='web'):
386 return copy.deepcopy(INNERTUBE_CLIENTS[client])
387
388 def _get_innertube_host(self, client='web'):
389 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
390
391 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
392 # try_get but with fallback to default ytcfg client values when present
393 _func = lambda y: try_get(y, getter, expected_type)
394 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
395
396 def _extract_client_name(self, ytcfg, default_client='web'):
397 return self._ytcfg_get_safe(
398 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
399 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
400
401 def _extract_client_version(self, ytcfg, default_client='web'):
402 return self._ytcfg_get_safe(
403 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
404 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
405
406 def _extract_api_key(self, ytcfg=None, default_client='web'):
407 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
408
409 def _extract_context(self, ytcfg=None, default_client='web'):
410 context = get_first(
411 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
412 # Enforce language for extraction
413 traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en'
414 return context
415
416 _SAPISID = None
417
418 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
419 time_now = round(time.time())
420 if self._SAPISID is None:
421 yt_cookies = self._get_cookies('https://www.youtube.com')
422 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
423 # See: https://github.com/yt-dlp/yt-dlp/issues/393
424 sapisid_cookie = dict_get(
425 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
426 if sapisid_cookie and sapisid_cookie.value:
427 self._SAPISID = sapisid_cookie.value
428 self.write_debug('Extracted SAPISID cookie')
429 # SAPISID cookie is required if not already present
430 if not yt_cookies.get('SAPISID'):
431 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
432 self._set_cookie(
433 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
434 else:
435 self._SAPISID = False
436 if not self._SAPISID:
437 return None
438 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
439 sapisidhash = hashlib.sha1(
440 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
441 return f'SAPISIDHASH {time_now}_{sapisidhash}'
442
443 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
444 note='Downloading API JSON', errnote='Unable to download API page',
445 context=None, api_key=None, api_hostname=None, default_client='web'):
446
447 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
448 data.update(query)
449 real_headers = self.generate_api_headers(default_client=default_client)
450 real_headers.update({'content-type': 'application/json'})
451 if headers:
452 real_headers.update(headers)
453 return self._download_json(
454 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
455 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
456 data=json.dumps(data).encode('utf8'), headers=real_headers,
457 query={'key': api_key or self._extract_api_key()})
458
459 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
460 data = self._search_regex(
461 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
462 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
463 if data:
464 return self._parse_json(data, item_id, fatal=fatal)
465
466 @staticmethod
467 def _extract_session_index(*data):
468 """
469 Index of current account in account list.
470 See: https://github.com/yt-dlp/yt-dlp/pull/519
471 """
472 for ytcfg in data:
473 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
474 if session_index is not None:
475 return session_index
476
477 # Deprecated?
478 def _extract_identity_token(self, ytcfg=None, webpage=None):
479 if ytcfg:
480 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
481 if token:
482 return token
483 if webpage:
484 return self._search_regex(
485 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
486 'identity token', default=None, fatal=False)
487
488 @staticmethod
489 def _extract_account_syncid(*args):
490 """
491 Extract syncId required to download private playlists of secondary channels
492 @params response and/or ytcfg
493 """
494 for data in args:
495 # ytcfg includes channel_syncid if on secondary channel
496 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
497 if delegated_sid:
498 return delegated_sid
499 sync_ids = (try_get(
500 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
501 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
502 if len(sync_ids) >= 2 and sync_ids[1]:
503 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
504 # and just "user_syncid||" for primary channel. We only want the channel_syncid
505 return sync_ids[0]
506
507 @staticmethod
508 def _extract_visitor_data(*args):
509 """
510 Extracts visitorData from an API response or ytcfg
511 Appears to be used to track session state
512 """
513 return get_first(
514 args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
515 expected_type=str)
516
517 @property
518 def is_authenticated(self):
519 return bool(self._generate_sapisidhash_header())
520
521 def extract_ytcfg(self, video_id, webpage):
522 if not webpage:
523 return {}
524 return self._parse_json(
525 self._search_regex(
526 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
527 default='{}'), video_id, fatal=False) or {}
528
529 def generate_api_headers(
530 self, *, ytcfg=None, account_syncid=None, session_index=None,
531 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
532
533 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
534 headers = {
535 'X-YouTube-Client-Name': compat_str(
536 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
537 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
538 'Origin': origin,
539 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
540 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
541 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
542 }
543 if session_index is None:
544 session_index = self._extract_session_index(ytcfg)
545 if account_syncid or session_index is not None:
546 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
547
548 auth = self._generate_sapisidhash_header(origin)
549 if auth is not None:
550 headers['Authorization'] = auth
551 headers['X-Origin'] = origin
552 return {h: v for h, v in headers.items() if v is not None}
553
554 @staticmethod
555 def _build_api_continuation_query(continuation, ctp=None):
556 query = {
557 'continuation': continuation
558 }
559 # TODO: Inconsistency with clickTrackingParams.
560 # Currently we have a fixed ctp contained within context (from ytcfg)
561 # and a ctp in root query for continuation.
562 if ctp:
563 query['clickTracking'] = {'clickTrackingParams': ctp}
564 return query
565
566 @classmethod
567 def _extract_next_continuation_data(cls, renderer):
568 next_continuation = try_get(
569 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
570 lambda x: x['continuation']['reloadContinuationData']), dict)
571 if not next_continuation:
572 return
573 continuation = next_continuation.get('continuation')
574 if not continuation:
575 return
576 ctp = next_continuation.get('clickTrackingParams')
577 return cls._build_api_continuation_query(continuation, ctp)
578
579 @classmethod
580 def _extract_continuation_ep_data(cls, continuation_ep: dict):
581 if isinstance(continuation_ep, dict):
582 continuation = try_get(
583 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
584 if not continuation:
585 return
586 ctp = continuation_ep.get('clickTrackingParams')
587 return cls._build_api_continuation_query(continuation, ctp)
588
589 @classmethod
590 def _extract_continuation(cls, renderer):
591 next_continuation = cls._extract_next_continuation_data(renderer)
592 if next_continuation:
593 return next_continuation
594
595 contents = []
596 for key in ('contents', 'items'):
597 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
598
599 for content in contents:
600 if not isinstance(content, dict):
601 continue
602 continuation_ep = try_get(
603 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
604 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
605 dict)
606 continuation = cls._extract_continuation_ep_data(continuation_ep)
607 if continuation:
608 return continuation
609
610 @classmethod
611 def _extract_alerts(cls, data):
612 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
613 if not isinstance(alert_dict, dict):
614 continue
615 for alert in alert_dict.values():
616 alert_type = alert.get('type')
617 if not alert_type:
618 continue
619 message = cls._get_text(alert, 'text')
620 if message:
621 yield alert_type, message
622
623 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
624 errors = []
625 warnings = []
626 for alert_type, alert_message in alerts:
627 if alert_type.lower() == 'error' and fatal:
628 errors.append([alert_type, alert_message])
629 else:
630 warnings.append([alert_type, alert_message])
631
632 for alert_type, alert_message in (warnings + errors[:-1]):
633 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
634 if errors:
635 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
636
637 def _extract_and_report_alerts(self, data, *args, **kwargs):
638 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
639
640 def _extract_badges(self, renderer: dict):
641 badges = set()
642 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
643 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
644 if label:
645 badges.add(label.lower())
646 return badges
647
648 @staticmethod
649 def _get_text(data, *path_list, max_runs=None):
650 for path in path_list or [None]:
651 if path is None:
652 obj = [data]
653 else:
654 obj = traverse_obj(data, path, default=[])
655 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
656 obj = [obj]
657 for item in obj:
658 text = try_get(item, lambda x: x['simpleText'], compat_str)
659 if text:
660 return text
661 runs = try_get(item, lambda x: x['runs'], list) or []
662 if not runs and isinstance(item, list):
663 runs = item
664
665 runs = runs[:min(len(runs), max_runs or len(runs))]
666 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
667 if text:
668 return text
669
670 @staticmethod
671 def _extract_thumbnails(data, *path_list):
672 """
673 Extract thumbnails from thumbnails dict
674 @param path_list: path list to level that contains 'thumbnails' key
675 """
676 thumbnails = []
677 for path in path_list or [()]:
678 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
679 thumbnail_url = url_or_none(thumbnail.get('url'))
680 if not thumbnail_url:
681 continue
682 # Sometimes youtube gives a wrong thumbnail URL. See:
683 # https://github.com/yt-dlp/yt-dlp/issues/233
684 # https://github.com/ytdl-org/youtube-dl/issues/28023
685 if 'maxresdefault' in thumbnail_url:
686 thumbnail_url = thumbnail_url.split('?')[0]
687 thumbnails.append({
688 'url': thumbnail_url,
689 'height': int_or_none(thumbnail.get('height')),
690 'width': int_or_none(thumbnail.get('width')),
691 })
692 return thumbnails
693
694 @staticmethod
695 def extract_relative_time(relative_time_text):
696 """
697 Extracts a relative time from string and converts to dt object
698 e.g. 'streamed 6 days ago', '5 seconds ago (edited)'
699 """
700 mobj = re.search(r'(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
701 if mobj:
702 try:
703 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')), precision='auto')
704 except ValueError:
705 return None
706
707 def _extract_time_text(self, renderer, *path_list):
708 text = self._get_text(renderer, *path_list) or ''
709 dt = self.extract_relative_time(text)
710 timestamp = None
711 if isinstance(dt, datetime.datetime):
712 timestamp = calendar.timegm(dt.timetuple())
713 if text and timestamp is None:
714 self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)
715 return timestamp, text
716
717 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
718 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
719 default_client='web'):
720 response = None
721 last_error = None
722 count = -1
723 retries = self.get_param('extractor_retries', 3)
724 if check_get_keys is None:
725 check_get_keys = []
726 while count < retries:
727 count += 1
728 if last_error:
729 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
730 try:
731 response = self._call_api(
732 ep=ep, fatal=True, headers=headers,
733 video_id=item_id, query=query,
734 context=self._extract_context(ytcfg, default_client),
735 api_key=self._extract_api_key(ytcfg, default_client),
736 api_hostname=api_hostname, default_client=default_client,
737 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
738 except ExtractorError as e:
739 if isinstance(e.cause, network_exceptions):
740 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
741 e.cause.seek(0)
742 yt_error = try_get(
743 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
744 lambda x: x['error']['message'], compat_str)
745 if yt_error:
746 self._report_alerts([('ERROR', yt_error)], fatal=False)
747 # Downloading page may result in intermittent 5xx HTTP error
748 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
749 # We also want to catch all other network exceptions since errors in later pages can be troublesome
750 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
751 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
752 last_error = error_to_compat_str(e.cause or e.msg)
753 if count < retries:
754 continue
755 if fatal:
756 raise
757 else:
758 self.report_warning(error_to_compat_str(e))
759 return
760
761 else:
762 try:
763 self._extract_and_report_alerts(response, only_once=True)
764 except ExtractorError as e:
765 # YouTube servers may return errors we want to retry on in a 200 OK response
766 # See: https://github.com/yt-dlp/yt-dlp/issues/839
767 if 'unknown error' in e.msg.lower():
768 last_error = e.msg
769 continue
770 if fatal:
771 raise
772 self.report_warning(error_to_compat_str(e))
773 return
774 if not check_get_keys or dict_get(response, check_get_keys):
775 break
776 # Youtube sometimes sends incomplete data
777 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
778 last_error = 'Incomplete data received'
779 if count >= retries:
780 if fatal:
781 raise ExtractorError(last_error)
782 else:
783 self.report_warning(last_error)
784 return
785 return response
786
787 @staticmethod
788 def is_music_url(url):
789 return re.match(r'https?://music\.youtube\.com/', url) is not None
790
791 def _extract_video(self, renderer):
792 video_id = renderer.get('videoId')
793 title = self._get_text(renderer, 'title')
794 description = self._get_text(renderer, 'descriptionSnippet')
795 duration = parse_duration(self._get_text(
796 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
797 view_count_text = self._get_text(renderer, 'viewCountText') or ''
798 view_count = str_to_int(self._search_regex(
799 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
800 'view count', default=None))
801
802 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
803 channel_id = traverse_obj(
804 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)
805 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
806 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
807 overlay_style = traverse_obj(
808 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
809 badges = self._extract_badges(renderer)
810 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
811
812 return {
813 '_type': 'url',
814 'ie_key': YoutubeIE.ie_key(),
815 'id': video_id,
816 'url': f'https://www.youtube.com/watch?v={video_id}',
817 'title': title,
818 'description': description,
819 'duration': duration,
820 'view_count': view_count,
821 'uploader': uploader,
822 'channel_id': channel_id,
823 'thumbnails': thumbnails,
824 'upload_date': strftime_or_none(timestamp, '%Y%m%d'),
825 'live_status': ('is_upcoming' if scheduled_timestamp is not None
826 else 'was_live' if 'streamed' in time_text.lower()
827 else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
828 else None),
829 'release_timestamp': scheduled_timestamp,
830 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
831 }
832
833
834 class YoutubeIE(YoutubeBaseInfoExtractor):
835 IE_DESC = 'YouTube'
836 _VALID_URL = r"""(?x)^
837 (
838 (?:https?://|//) # http(s):// or protocol-independent URL
839 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
840 (?:www\.)?deturl\.com/www\.youtube\.com|
841 (?:www\.)?pwnyoutube\.com|
842 (?:www\.)?hooktube\.com|
843 (?:www\.)?yourepeat\.com|
844 tube\.majestyc\.net|
845 %(invidious)s|
846 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
847 (?:.*?\#/)? # handle anchor (#/) redirect urls
848 (?: # the various things that can precede the ID:
849 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
850 |(?: # or the v= param in all its forms
851 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
852 (?:\?|\#!?) # the params delimiter ? or # or #!
853 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
854 v=
855 )
856 ))
857 |(?:
858 youtu\.be| # just youtu.be/xxxx
859 vid\.plus| # or vid.plus/xxxx
860 zwearz\.com/watch| # or zwearz.com/watch/xxxx
861 %(invidious)s
862 )/
863 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
864 )
865 )? # all until now is optional -> you can pass the naked ID
866 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
867 (?(1).+)? # if we found the ID, everything can follow
868 (?:\#|$)""" % {
869 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
870 }
871 _PLAYER_INFO_RE = (
872 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
873 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
874 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
875 )
876 _formats = {
877 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
878 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
879 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
880 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
881 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
882 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
883 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
884 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
885 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
886 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
887 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
888 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
889 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
890 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
891 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
892 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
893 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
894 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
895
896
897 # 3D videos
898 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
899 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
900 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
901 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
902 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
903 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
904 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
905
906 # Apple HTTP Live Streaming
907 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
908 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
909 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
910 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
911 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
912 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
913 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
914 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
915
916 # DASH mp4 video
917 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
918 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
919 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
920 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
921 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
922 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
923 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
924 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
925 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
926 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
927 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
928 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
929
930 # Dash mp4 audio
931 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
932 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
933 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
934 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
935 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
936 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
937 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
938
939 # Dash webm
940 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
941 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
942 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
943 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
944 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
945 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
946 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
947 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
948 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
949 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
950 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
951 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
952 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
953 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
954 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
955 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
956 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
957 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
958 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
959 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
960 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
961 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
962
963 # Dash webm audio
964 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
965 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
966
967 # Dash webm audio with opus inside
968 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
969 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
970 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
971
972 # RTMP (unnamed)
973 '_rtmp': {'protocol': 'rtmp'},
974
975 # av01 video only formats sometimes served with "unknown" codecs
976 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
977 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
978 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
979 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
980 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
981 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
982 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
983 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
984 }
985 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
986
987 _GEO_BYPASS = False
988
989 IE_NAME = 'youtube'
990 _TESTS = [
991 {
992 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
993 'info_dict': {
994 'id': 'BaW_jenozKc',
995 'ext': 'mp4',
996 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
997 'uploader': 'Philipp Hagemeister',
998 'uploader_id': 'phihag',
999 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1000 'channel': 'Philipp Hagemeister',
1001 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1002 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1003 'upload_date': '20121002',
1004 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1005 'categories': ['Science & Technology'],
1006 'tags': ['youtube-dl'],
1007 'duration': 10,
1008 'view_count': int,
1009 'like_count': int,
1010 # 'dislike_count': int,
1011 'availability': 'public',
1012 'playable_in_embed': True,
1013 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1014 'live_status': 'not_live',
1015 'age_limit': 0,
1016 'start_time': 1,
1017 'end_time': 9,
1018 }
1019 },
1020 {
1021 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1022 'note': 'Embed-only video (#1746)',
1023 'info_dict': {
1024 'id': 'yZIXLfi8CZQ',
1025 'ext': 'mp4',
1026 'upload_date': '20120608',
1027 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1028 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1029 'uploader': 'SET India',
1030 'uploader_id': 'setindia',
1031 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1032 'age_limit': 18,
1033 },
1034 'skip': 'Private video',
1035 },
1036 {
1037 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1038 'note': 'Use the first video ID in the URL',
1039 'info_dict': {
1040 'id': 'BaW_jenozKc',
1041 'ext': 'mp4',
1042 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1043 'uploader': 'Philipp Hagemeister',
1044 'uploader_id': 'phihag',
1045 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1046 'upload_date': '20121002',
1047 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1048 'categories': ['Science & Technology'],
1049 'tags': ['youtube-dl'],
1050 'duration': 10,
1051 'view_count': int,
1052 'like_count': int,
1053 'dislike_count': int,
1054 },
1055 'params': {
1056 'skip_download': True,
1057 },
1058 },
1059 {
1060 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1061 'note': '256k DASH audio (format 141) via DASH manifest',
1062 'info_dict': {
1063 'id': 'a9LDPn-MO4I',
1064 'ext': 'm4a',
1065 'upload_date': '20121002',
1066 'uploader_id': '8KVIDEO',
1067 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1068 'description': '',
1069 'uploader': '8KVIDEO',
1070 'title': 'UHDTV TEST 8K VIDEO.mp4'
1071 },
1072 'params': {
1073 'youtube_include_dash_manifest': True,
1074 'format': '141',
1075 },
1076 'skip': 'format 141 not served anymore',
1077 },
1078 # DASH manifest with encrypted signature
1079 {
1080 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1081 'info_dict': {
1082 'id': 'IB3lcPjvWLA',
1083 'ext': 'm4a',
1084 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1085 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1086 'duration': 244,
1087 'uploader': 'AfrojackVEVO',
1088 'uploader_id': 'AfrojackVEVO',
1089 'upload_date': '20131011',
1090 'abr': 129.495,
1091 },
1092 'params': {
1093 'youtube_include_dash_manifest': True,
1094 'format': '141/bestaudio[ext=m4a]',
1095 },
1096 },
1097 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1098 {
1099 'note': 'Embed allowed age-gate video',
1100 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1101 'info_dict': {
1102 'id': 'HtVdAasjOgU',
1103 'ext': 'mp4',
1104 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1105 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1106 'duration': 142,
1107 'uploader': 'The Witcher',
1108 'uploader_id': 'WitcherGame',
1109 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1110 'upload_date': '20140605',
1111 'age_limit': 18,
1112 },
1113 },
1114 {
1115 'note': 'Age-gate video with embed allowed in public site',
1116 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1117 'info_dict': {
1118 'id': 'HsUATh_Nc2U',
1119 'ext': 'mp4',
1120 'title': 'Godzilla 2 (Official Video)',
1121 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1122 'upload_date': '20200408',
1123 'uploader_id': 'FlyingKitty900',
1124 'uploader': 'FlyingKitty',
1125 'age_limit': 18,
1126 },
1127 },
1128 {
1129 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1130 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1131 'info_dict': {
1132 'id': 'Tq92D6wQ1mg',
1133 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1134 'ext': 'mp4',
1135 'upload_date': '20191227',
1136 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1137 'uploader': 'Projekt Melody',
1138 'description': 'md5:17eccca93a786d51bc67646756894066',
1139 'age_limit': 18,
1140 },
1141 },
1142 {
1143 'note': 'Non-Agegated non-embeddable video',
1144 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1145 'info_dict': {
1146 'id': 'MeJVWBSsPAY',
1147 'ext': 'mp4',
1148 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1149 'uploader': 'Herr Lurik',
1150 'uploader_id': 'st3in234',
1151 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1152 'upload_date': '20130730',
1153 },
1154 },
1155 {
1156 'note': 'Non-bypassable age-gated video',
1157 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1158 'only_matching': True,
1159 },
1160 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1161 # YouTube Red ad is not captured for creator
1162 {
1163 'url': '__2ABJjxzNo',
1164 'info_dict': {
1165 'id': '__2ABJjxzNo',
1166 'ext': 'mp4',
1167 'duration': 266,
1168 'upload_date': '20100430',
1169 'uploader_id': 'deadmau5',
1170 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1171 'creator': 'deadmau5',
1172 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1173 'uploader': 'deadmau5',
1174 'title': 'Deadmau5 - Some Chords (HD)',
1175 'alt_title': 'Some Chords',
1176 },
1177 'expected_warnings': [
1178 'DASH manifest missing',
1179 ]
1180 },
1181 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1182 {
1183 'url': 'lqQg6PlCWgI',
1184 'info_dict': {
1185 'id': 'lqQg6PlCWgI',
1186 'ext': 'mp4',
1187 'duration': 6085,
1188 'upload_date': '20150827',
1189 'uploader_id': 'olympic',
1190 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1191 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1192 'uploader': 'Olympics',
1193 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1194 },
1195 'params': {
1196 'skip_download': 'requires avconv',
1197 }
1198 },
1199 # Non-square pixels
1200 {
1201 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1202 'info_dict': {
1203 'id': '_b-2C3KPAM0',
1204 'ext': 'mp4',
1205 'stretched_ratio': 16 / 9.,
1206 'duration': 85,
1207 'upload_date': '20110310',
1208 'uploader_id': 'AllenMeow',
1209 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1210 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1211 'uploader': '孫ᄋᄅ',
1212 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1213 },
1214 },
1215 # url_encoded_fmt_stream_map is empty string
1216 {
1217 'url': 'qEJwOuvDf7I',
1218 'info_dict': {
1219 'id': 'qEJwOuvDf7I',
1220 'ext': 'webm',
1221 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1222 'description': '',
1223 'upload_date': '20150404',
1224 'uploader_id': 'spbelect',
1225 'uploader': 'Наблюдатели Петербурга',
1226 },
1227 'params': {
1228 'skip_download': 'requires avconv',
1229 },
1230 'skip': 'This live event has ended.',
1231 },
1232 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1233 {
1234 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1235 'info_dict': {
1236 'id': 'FIl7x6_3R5Y',
1237 'ext': 'webm',
1238 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1239 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1240 'duration': 220,
1241 'upload_date': '20150625',
1242 'uploader_id': 'dorappi2000',
1243 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1244 'uploader': 'dorappi2000',
1245 'formats': 'mincount:31',
1246 },
1247 'skip': 'not actual anymore',
1248 },
1249 # DASH manifest with segment_list
1250 {
1251 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1252 'md5': '8ce563a1d667b599d21064e982ab9e31',
1253 'info_dict': {
1254 'id': 'CsmdDsKjzN8',
1255 'ext': 'mp4',
1256 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1257 'uploader': 'Airtek',
1258 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1259 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1260 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1261 },
1262 'params': {
1263 'youtube_include_dash_manifest': True,
1264 'format': '135', # bestvideo
1265 },
1266 'skip': 'This live event has ended.',
1267 },
1268 {
1269 # Multifeed videos (multiple cameras), URL is for Main Camera
1270 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1271 'info_dict': {
1272 'id': 'jvGDaLqkpTg',
1273 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1274 'description': 'md5:e03b909557865076822aa169218d6a5d',
1275 },
1276 'playlist': [{
1277 'info_dict': {
1278 'id': 'jvGDaLqkpTg',
1279 'ext': 'mp4',
1280 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1281 'description': 'md5:e03b909557865076822aa169218d6a5d',
1282 'duration': 10643,
1283 'upload_date': '20161111',
1284 'uploader': 'Team PGP',
1285 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1286 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1287 },
1288 }, {
1289 'info_dict': {
1290 'id': '3AKt1R1aDnw',
1291 'ext': 'mp4',
1292 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1293 'description': 'md5:e03b909557865076822aa169218d6a5d',
1294 'duration': 10991,
1295 'upload_date': '20161111',
1296 'uploader': 'Team PGP',
1297 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1298 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1299 },
1300 }, {
1301 'info_dict': {
1302 'id': 'RtAMM00gpVc',
1303 'ext': 'mp4',
1304 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1305 'description': 'md5:e03b909557865076822aa169218d6a5d',
1306 'duration': 10995,
1307 'upload_date': '20161111',
1308 'uploader': 'Team PGP',
1309 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1310 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1311 },
1312 }, {
1313 'info_dict': {
1314 'id': '6N2fdlP3C5U',
1315 'ext': 'mp4',
1316 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1317 'description': 'md5:e03b909557865076822aa169218d6a5d',
1318 'duration': 10990,
1319 'upload_date': '20161111',
1320 'uploader': 'Team PGP',
1321 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1322 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1323 },
1324 }],
1325 'params': {
1326 'skip_download': True,
1327 },
1328 'skip': 'Not multifeed anymore',
1329 },
1330 {
1331 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1332 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1333 'info_dict': {
1334 'id': 'gVfLd0zydlo',
1335 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1336 },
1337 'playlist_count': 2,
1338 'skip': 'Not multifeed anymore',
1339 },
1340 {
1341 'url': 'https://vid.plus/FlRa-iH7PGw',
1342 'only_matching': True,
1343 },
1344 {
1345 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1346 'only_matching': True,
1347 },
1348 {
1349 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1350 # Also tests cut-off URL expansion in video description (see
1351 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1352 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1353 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1354 'info_dict': {
1355 'id': 'lsguqyKfVQg',
1356 'ext': 'mp4',
1357 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1358 'alt_title': 'Dark Walk',
1359 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1360 'duration': 133,
1361 'upload_date': '20151119',
1362 'uploader_id': 'IronSoulElf',
1363 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1364 'uploader': 'IronSoulElf',
1365 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1366 'track': 'Dark Walk',
1367 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1368 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1369 },
1370 'params': {
1371 'skip_download': True,
1372 },
1373 },
1374 {
1375 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1376 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1377 'only_matching': True,
1378 },
1379 {
1380 # Video with yt:stretch=17:0
1381 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1382 'info_dict': {
1383 'id': 'Q39EVAstoRM',
1384 'ext': 'mp4',
1385 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1386 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1387 'upload_date': '20151107',
1388 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1389 'uploader': 'CH GAMER DROID',
1390 },
1391 'params': {
1392 'skip_download': True,
1393 },
1394 'skip': 'This video does not exist.',
1395 },
1396 {
1397 # Video with incomplete 'yt:stretch=16:'
1398 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1399 'only_matching': True,
1400 },
1401 {
1402 # Video licensed under Creative Commons
1403 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1404 'info_dict': {
1405 'id': 'M4gD1WSo5mA',
1406 'ext': 'mp4',
1407 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1408 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1409 'duration': 721,
1410 'upload_date': '20150127',
1411 'uploader_id': 'BerkmanCenter',
1412 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1413 'uploader': 'The Berkman Klein Center for Internet & Society',
1414 'license': 'Creative Commons Attribution license (reuse allowed)',
1415 },
1416 'params': {
1417 'skip_download': True,
1418 },
1419 },
1420 {
1421 # Channel-like uploader_url
1422 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1423 'info_dict': {
1424 'id': 'eQcmzGIKrzg',
1425 'ext': 'mp4',
1426 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1427 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1428 'duration': 4060,
1429 'upload_date': '20151119',
1430 'uploader': 'Bernie Sanders',
1431 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1432 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1433 'license': 'Creative Commons Attribution license (reuse allowed)',
1434 },
1435 'params': {
1436 'skip_download': True,
1437 },
1438 },
1439 {
1440 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1441 'only_matching': True,
1442 },
1443 {
1444 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1445 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1446 'only_matching': True,
1447 },
1448 {
1449 # Rental video preview
1450 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1451 'info_dict': {
1452 'id': 'uGpuVWrhIzE',
1453 'ext': 'mp4',
1454 'title': 'Piku - Trailer',
1455 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1456 'upload_date': '20150811',
1457 'uploader': 'FlixMatrix',
1458 'uploader_id': 'FlixMatrixKaravan',
1459 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1460 'license': 'Standard YouTube License',
1461 },
1462 'params': {
1463 'skip_download': True,
1464 },
1465 'skip': 'This video is not available.',
1466 },
1467 {
1468 # YouTube Red video with episode data
1469 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1470 'info_dict': {
1471 'id': 'iqKdEhx-dD4',
1472 'ext': 'mp4',
1473 'title': 'Isolation - Mind Field (Ep 1)',
1474 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1475 'duration': 2085,
1476 'upload_date': '20170118',
1477 'uploader': 'Vsauce',
1478 'uploader_id': 'Vsauce',
1479 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1480 'series': 'Mind Field',
1481 'season_number': 1,
1482 'episode_number': 1,
1483 },
1484 'params': {
1485 'skip_download': True,
1486 },
1487 'expected_warnings': [
1488 'Skipping DASH manifest',
1489 ],
1490 },
1491 {
1492 # The following content has been identified by the YouTube community
1493 # as inappropriate or offensive to some audiences.
1494 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1495 'info_dict': {
1496 'id': '6SJNVb0GnPI',
1497 'ext': 'mp4',
1498 'title': 'Race Differences in Intelligence',
1499 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1500 'duration': 965,
1501 'upload_date': '20140124',
1502 'uploader': 'New Century Foundation',
1503 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1504 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1505 },
1506 'params': {
1507 'skip_download': True,
1508 },
1509 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1510 },
1511 {
1512 # itag 212
1513 'url': '1t24XAntNCY',
1514 'only_matching': True,
1515 },
1516 {
1517 # geo restricted to JP
1518 'url': 'sJL6WA-aGkQ',
1519 'only_matching': True,
1520 },
1521 {
1522 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1523 'only_matching': True,
1524 },
1525 {
1526 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1527 'only_matching': True,
1528 },
1529 {
1530 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1531 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1532 'only_matching': True,
1533 },
1534 {
1535 # DRM protected
1536 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1537 'only_matching': True,
1538 },
1539 {
1540 # Video with unsupported adaptive stream type formats
1541 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1542 'info_dict': {
1543 'id': 'Z4Vy8R84T1U',
1544 'ext': 'mp4',
1545 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1546 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1547 'duration': 433,
1548 'upload_date': '20130923',
1549 'uploader': 'Amelia Putri Harwita',
1550 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1551 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1552 'formats': 'maxcount:10',
1553 },
1554 'params': {
1555 'skip_download': True,
1556 'youtube_include_dash_manifest': False,
1557 },
1558 'skip': 'not actual anymore',
1559 },
1560 {
1561 # Youtube Music Auto-generated description
1562 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1563 'info_dict': {
1564 'id': 'MgNrAu2pzNs',
1565 'ext': 'mp4',
1566 'title': 'Voyeur Girl',
1567 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1568 'upload_date': '20190312',
1569 'uploader': 'Stephen - Topic',
1570 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1571 'artist': 'Stephen',
1572 'track': 'Voyeur Girl',
1573 'album': 'it\'s too much love to know my dear',
1574 'release_date': '20190313',
1575 'release_year': 2019,
1576 },
1577 'params': {
1578 'skip_download': True,
1579 },
1580 },
1581 {
1582 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1583 'only_matching': True,
1584 },
1585 {
1586 # invalid -> valid video id redirection
1587 'url': 'DJztXj2GPfl',
1588 'info_dict': {
1589 'id': 'DJztXj2GPfk',
1590 'ext': 'mp4',
1591 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1592 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1593 'upload_date': '20090125',
1594 'uploader': 'Prochorowka',
1595 'uploader_id': 'Prochorowka',
1596 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1597 'artist': 'Panjabi MC',
1598 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1599 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1600 },
1601 'params': {
1602 'skip_download': True,
1603 },
1604 'skip': 'Video unavailable',
1605 },
1606 {
1607 # empty description results in an empty string
1608 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1609 'info_dict': {
1610 'id': 'x41yOUIvK2k',
1611 'ext': 'mp4',
1612 'title': 'IMG 3456',
1613 'description': '',
1614 'upload_date': '20170613',
1615 'uploader_id': 'ElevageOrVert',
1616 'uploader': 'ElevageOrVert',
1617 },
1618 'params': {
1619 'skip_download': True,
1620 },
1621 },
1622 {
1623 # with '};' inside yt initial data (see [1])
1624 # see [2] for an example with '};' inside ytInitialPlayerResponse
1625 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1626 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1627 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1628 'info_dict': {
1629 'id': 'CHqg6qOn4no',
1630 'ext': 'mp4',
1631 'title': 'Part 77 Sort a list of simple types in c#',
1632 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1633 'upload_date': '20130831',
1634 'uploader_id': 'kudvenkat',
1635 'uploader': 'kudvenkat',
1636 },
1637 'params': {
1638 'skip_download': True,
1639 },
1640 },
1641 {
1642 # another example of '};' in ytInitialData
1643 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1644 'only_matching': True,
1645 },
1646 {
1647 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1648 'only_matching': True,
1649 },
1650 {
1651 # https://github.com/ytdl-org/youtube-dl/pull/28094
1652 'url': 'OtqTfy26tG0',
1653 'info_dict': {
1654 'id': 'OtqTfy26tG0',
1655 'ext': 'mp4',
1656 'title': 'Burn Out',
1657 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1658 'upload_date': '20141120',
1659 'uploader': 'The Cinematic Orchestra - Topic',
1660 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1661 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1662 'artist': 'The Cinematic Orchestra',
1663 'track': 'Burn Out',
1664 'album': 'Every Day',
1665 'release_data': None,
1666 'release_year': None,
1667 },
1668 'params': {
1669 'skip_download': True,
1670 },
1671 },
1672 {
1673 # controversial video, only works with bpctr when authenticated with cookies
1674 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1675 'only_matching': True,
1676 },
1677 {
1678 # controversial video, requires bpctr/contentCheckOk
1679 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1680 'info_dict': {
1681 'id': 'SZJvDhaSDnc',
1682 'ext': 'mp4',
1683 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1684 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1685 'uploader': 'CBS This Morning',
1686 'uploader_id': 'CBSThisMorning',
1687 'upload_date': '20140716',
1688 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1689 }
1690 },
1691 {
1692 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1693 'url': 'cBvYw8_A0vQ',
1694 'info_dict': {
1695 'id': 'cBvYw8_A0vQ',
1696 'ext': 'mp4',
1697 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1698 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1699 'upload_date': '20201120',
1700 'uploader': 'Walk around Japan',
1701 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1702 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1703 },
1704 'params': {
1705 'skip_download': True,
1706 },
1707 }, {
1708 # Has multiple audio streams
1709 'url': 'WaOKSUlf4TM',
1710 'only_matching': True
1711 }, {
1712 # Requires Premium: has format 141 when requested using YTM url
1713 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1714 'only_matching': True
1715 }, {
1716 # multiple subtitles with same lang_code
1717 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1718 'only_matching': True,
1719 }, {
1720 # Force use android client fallback
1721 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1722 'info_dict': {
1723 'id': 'YOelRv7fMxY',
1724 'title': 'DIGGING A SECRET TUNNEL Part 1',
1725 'ext': '3gp',
1726 'upload_date': '20210624',
1727 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1728 'uploader': 'colinfurze',
1729 'uploader_id': 'colinfurze',
1730 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1731 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1732 },
1733 'params': {
1734 'format': '17', # 3gp format available on android
1735 'extractor_args': {'youtube': {'player_client': ['android']}},
1736 },
1737 },
1738 {
1739 # Skip download of additional client configs (remix client config in this case)
1740 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1741 'only_matching': True,
1742 'params': {
1743 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1744 },
1745 }, {
1746 # shorts
1747 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1748 'only_matching': True,
1749 }, {
1750 'note': 'Storyboards',
1751 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1752 'info_dict': {
1753 'id': '5KLPxDtMqe8',
1754 'ext': 'mhtml',
1755 'format_id': 'sb0',
1756 'title': 'Your Brain is Plastic',
1757 'uploader_id': 'scishow',
1758 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1759 'upload_date': '20140324',
1760 'uploader': 'SciShow',
1761 }, 'params': {'format': 'mhtml', 'skip_download': True}
1762 }
1763 ]
1764
1765 @classmethod
1766 def suitable(cls, url):
1767 from ..utils import parse_qs
1768
1769 qs = parse_qs(url)
1770 if qs.get('list', [None])[0]:
1771 return False
1772 return super(YoutubeIE, cls).suitable(url)
1773
1774 def __init__(self, *args, **kwargs):
1775 super(YoutubeIE, self).__init__(*args, **kwargs)
1776 self._code_cache = {}
1777 self._player_cache = {}
1778
1779 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
1780 EXPIRATION_DURATION = 18_000
1781 lock = threading.Lock()
1782
1783 is_live = True
1784 expiration_time = time.time() + EXPIRATION_DURATION
1785 formats = [f for f in formats if f.get('is_from_start')]
1786
1787 def refetch_manifest(format_id):
1788 nonlocal formats, expiration_time, is_live
1789 if time.time() <= expiration_time:
1790 return
1791
1792 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
1793 video_details = traverse_obj(
1794 prs, (..., 'videoDetails'), expected_type=dict, default=[])
1795 microformats = traverse_obj(
1796 prs, (..., 'microformat', 'playerMicroformatRenderer'),
1797 expected_type=dict, default=[])
1798 _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
1799 expiration_time = time.time() + EXPIRATION_DURATION
1800
1801 def mpd_feed(format_id):
1802 """
1803 @returns (manifest_url, manifest_stream_number, is_live) or None
1804 """
1805 with lock:
1806 refetch_manifest(format_id)
1807
1808 f = next((f for f in formats if f['format_id'] == format_id), None)
1809 if not f:
1810 self.report_warning(
1811 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
1812 return None
1813 return f['manifest_url'], f['manifest_stream_number'], is_live
1814
1815 for f in formats:
1816 f['protocol'] = 'http_dash_segments_generator'
1817 f['fragments'] = functools.partial(
1818 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
1819
1820 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
1821 FETCH_SPAN, MAX_DURATION = 5, 432000
1822
1823 mpd_url, stream_number, is_live = None, None, True
1824
1825 begin_index = 0
1826 download_start_time = ctx.get('start') or time.time()
1827
1828 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
1829 if lack_early_segments:
1830 self.report_warning(bug_reports_message(
1831 'Starting download from the last 120 hours of the live stream since '
1832 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
1833 lack_early_segments = True
1834
1835 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
1836 fragments, fragment_base_url = None, None
1837
1838 def _extract_sequence_from_mpd(refresh_sequence):
1839 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
1840 # Obtain from MPD's maximum seq value
1841 old_mpd_url = mpd_url
1842 mpd_url, stream_number, is_live = mpd_feed(format_id) or (mpd_url, stream_number, False)
1843 if old_mpd_url == mpd_url and not refresh_sequence:
1844 return True, last_seq
1845 try:
1846 fmts, _ = self._extract_mpd_formats_and_subtitles(
1847 mpd_url, None, note=False, errnote=False, fatal=False)
1848 except ExtractorError:
1849 fmts = None
1850 if not fmts:
1851 no_fragment_score += 1
1852 return False, last_seq
1853 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
1854 fragments = fmt_info['fragments']
1855 fragment_base_url = fmt_info['fragment_base_url']
1856 assert fragment_base_url
1857
1858 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
1859 return True, _last_seq
1860
1861 while is_live:
1862 fetch_time = time.time()
1863 if no_fragment_score > 30:
1864 return
1865 if last_segment_url:
1866 # Obtain from "X-Head-Seqnum" header value from each segment
1867 try:
1868 urlh = self._request_webpage(
1869 last_segment_url, None, note=False, errnote=False, fatal=False)
1870 except ExtractorError:
1871 urlh = None
1872 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
1873 if last_seq is None:
1874 no_fragment_score += 1
1875 last_segment_url = None
1876 continue
1877 else:
1878 should_retry, last_seq = _extract_sequence_from_mpd(True)
1879 if not should_retry:
1880 continue
1881
1882 if known_idx > last_seq:
1883 last_segment_url = None
1884 continue
1885
1886 last_seq += 1
1887
1888 if begin_index < 0 and known_idx < 0:
1889 # skip from the start when it's negative value
1890 known_idx = last_seq + begin_index
1891 if lack_early_segments:
1892 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
1893 try:
1894 for idx in range(known_idx, last_seq):
1895 # do not update sequence here or you'll get skipped some part of it
1896 should_retry, _ = _extract_sequence_from_mpd(False)
1897 if not should_retry:
1898 # retry when it gets weird state
1899 known_idx = idx - 1
1900 raise ExtractorError('breaking out of outer loop')
1901 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
1902 yield {
1903 'url': last_segment_url,
1904 }
1905 if known_idx == last_seq:
1906 no_fragment_score += 5
1907 else:
1908 no_fragment_score = 0
1909 known_idx = last_seq
1910 except ExtractorError:
1911 continue
1912
1913 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
1914
1915 def _extract_player_url(self, *ytcfgs, webpage=None):
1916 player_url = traverse_obj(
1917 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1918 get_all=False, expected_type=compat_str)
1919 if not player_url:
1920 return
1921 if player_url.startswith('//'):
1922 player_url = 'https:' + player_url
1923 elif not re.match(r'https?://', player_url):
1924 player_url = compat_urlparse.urljoin(
1925 'https://www.youtube.com', player_url)
1926 return player_url
1927
1928 def _download_player_url(self, video_id, fatal=False):
1929 res = self._download_webpage(
1930 'https://www.youtube.com/iframe_api',
1931 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1932 if res:
1933 player_version = self._search_regex(
1934 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1935 if player_version:
1936 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1937
1938 def _signature_cache_id(self, example_sig):
1939 """ Return a string representation of a signature """
1940 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1941
1942 @classmethod
1943 def _extract_player_info(cls, player_url):
1944 for player_re in cls._PLAYER_INFO_RE:
1945 id_m = re.search(player_re, player_url)
1946 if id_m:
1947 break
1948 else:
1949 raise ExtractorError('Cannot identify player %r' % player_url)
1950 return id_m.group('id')
1951
1952 def _load_player(self, video_id, player_url, fatal=True):
1953 player_id = self._extract_player_info(player_url)
1954 if player_id not in self._code_cache:
1955 code = self._download_webpage(
1956 player_url, video_id, fatal=fatal,
1957 note='Downloading player ' + player_id,
1958 errnote='Download of %s failed' % player_url)
1959 if code:
1960 self._code_cache[player_id] = code
1961 return self._code_cache.get(player_id)
1962
1963 def _extract_signature_function(self, video_id, player_url, example_sig):
1964 player_id = self._extract_player_info(player_url)
1965
1966 # Read from filesystem cache
1967 func_id = 'js_%s_%s' % (
1968 player_id, self._signature_cache_id(example_sig))
1969 assert os.path.basename(func_id) == func_id
1970
1971 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1972 if cache_spec is not None:
1973 return lambda s: ''.join(s[i] for i in cache_spec)
1974
1975 code = self._load_player(video_id, player_url)
1976 if code:
1977 res = self._parse_sig_js(code)
1978
1979 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1980 cache_res = res(test_string)
1981 cache_spec = [ord(c) for c in cache_res]
1982
1983 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1984 return res
1985
1986 def _print_sig_code(self, func, example_sig):
1987 if not self.get_param('youtube_print_sig_code'):
1988 return
1989
1990 def gen_sig_code(idxs):
1991 def _genslice(start, end, step):
1992 starts = '' if start == 0 else str(start)
1993 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1994 steps = '' if step == 1 else (':%d' % step)
1995 return 's[%s%s%s]' % (starts, ends, steps)
1996
1997 step = None
1998 # Quelch pyflakes warnings - start will be set when step is set
1999 start = '(Never used)'
2000 for i, prev in zip(idxs[1:], idxs[:-1]):
2001 if step is not None:
2002 if i - prev == step:
2003 continue
2004 yield _genslice(start, prev, step)
2005 step = None
2006 continue
2007 if i - prev in [-1, 1]:
2008 step = i - prev
2009 start = prev
2010 continue
2011 else:
2012 yield 's[%d]' % prev
2013 if step is None:
2014 yield 's[%d]' % i
2015 else:
2016 yield _genslice(start, i, step)
2017
2018 test_string = ''.join(map(compat_chr, range(len(example_sig))))
2019 cache_res = func(test_string)
2020 cache_spec = [ord(c) for c in cache_res]
2021 expr_code = ' + '.join(gen_sig_code(cache_spec))
2022 signature_id_tuple = '(%s)' % (
2023 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
2024 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
2025 ' return %s\n') % (signature_id_tuple, expr_code)
2026 self.to_screen('Extracted signature function:\n' + code)
2027
2028 def _parse_sig_js(self, jscode):
2029 funcname = self._search_regex(
2030 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2031 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2032 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2033 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2034 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2035 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2036 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2037 # Obsolete patterns
2038 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2039 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2040 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2041 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2042 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2043 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2044 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2045 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2046 jscode, 'Initial JS player signature function name', group='sig')
2047
2048 jsi = JSInterpreter(jscode)
2049 initial_function = jsi.extract_function(funcname)
2050 return lambda s: initial_function([s])
2051
2052 def _decrypt_signature(self, s, video_id, player_url):
2053 """Turn the encrypted s field into a working signature"""
2054
2055 if player_url is None:
2056 raise ExtractorError('Cannot decrypt signature without player_url')
2057
2058 try:
2059 player_id = (player_url, self._signature_cache_id(s))
2060 if player_id not in self._player_cache:
2061 func = self._extract_signature_function(
2062 video_id, player_url, s
2063 )
2064 self._player_cache[player_id] = func
2065 func = self._player_cache[player_id]
2066 self._print_sig_code(func, s)
2067 return func(s)
2068 except Exception as e:
2069 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
2070
2071 def _decrypt_nsig(self, s, video_id, player_url):
2072 """Turn the encrypted n field into a working signature"""
2073 if player_url is None:
2074 raise ExtractorError('Cannot decrypt nsig without player_url')
2075 if player_url.startswith('//'):
2076 player_url = 'https:' + player_url
2077 elif not re.match(r'https?://', player_url):
2078 player_url = compat_urlparse.urljoin(
2079 'https://www.youtube.com', player_url)
2080
2081 sig_id = ('nsig_value', s)
2082 if sig_id in self._player_cache:
2083 return self._player_cache[sig_id]
2084
2085 try:
2086 player_id = ('nsig', player_url)
2087 if player_id not in self._player_cache:
2088 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
2089 func = self._player_cache[player_id]
2090 self._player_cache[sig_id] = func(s)
2091 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
2092 return self._player_cache[sig_id]
2093 except Exception as e:
2094 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
2095
2096 def _extract_n_function_name(self, jscode):
2097 return self._search_regex(
2098 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
2099 jscode, 'Initial JS player n function name', group='nfunc')
2100
2101 def _extract_n_function(self, video_id, player_url):
2102 player_id = self._extract_player_info(player_url)
2103 func_code = self._downloader.cache.load('youtube-nsig', player_id)
2104
2105 if func_code:
2106 jsi = JSInterpreter(func_code)
2107 else:
2108 jscode = self._load_player(video_id, player_url)
2109 funcname = self._extract_n_function_name(jscode)
2110 jsi = JSInterpreter(jscode)
2111 func_code = jsi.extract_function_code(funcname)
2112 self._downloader.cache.store('youtube-nsig', player_id, func_code)
2113
2114 if self.get_param('youtube_print_sig_code'):
2115 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2116
2117 return lambda s: jsi.extract_function_from_code(*func_code)([s])
2118
2119 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2120 """
2121 Extract signatureTimestamp (sts)
2122 Required to tell API what sig/player version is in use.
2123 """
2124 sts = None
2125 if isinstance(ytcfg, dict):
2126 sts = int_or_none(ytcfg.get('STS'))
2127
2128 if not sts:
2129 # Attempt to extract from player
2130 if player_url is None:
2131 error_msg = 'Cannot extract signature timestamp without player_url.'
2132 if fatal:
2133 raise ExtractorError(error_msg)
2134 self.report_warning(error_msg)
2135 return
2136 code = self._load_player(video_id, player_url, fatal=fatal)
2137 if code:
2138 sts = int_or_none(self._search_regex(
2139 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2140 'JS player signature timestamp', group='sts', fatal=fatal))
2141 return sts
2142
2143 def _mark_watched(self, video_id, player_responses):
2144 playback_url = get_first(
2145 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2146 expected_type=url_or_none)
2147 if not playback_url:
2148 self.report_warning('Unable to mark watched')
2149 return
2150 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2151 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2152
2153 # cpn generation algorithm is reverse engineered from base.js.
2154 # In fact it works even with dummy cpn.
2155 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2156 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2157
2158 qs.update({
2159 'ver': ['2'],
2160 'cpn': [cpn],
2161 })
2162 playback_url = compat_urlparse.urlunparse(
2163 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2164
2165 self._download_webpage(
2166 playback_url, video_id, 'Marking watched',
2167 'Unable to mark watched', fatal=False)
2168
2169 @staticmethod
2170 def _extract_urls(webpage):
2171 # Embedded YouTube player
2172 entries = [
2173 unescapeHTML(mobj.group('url'))
2174 for mobj in re.finditer(r'''(?x)
2175 (?:
2176 <iframe[^>]+?src=|
2177 data-video-url=|
2178 <embed[^>]+?src=|
2179 embedSWF\(?:\s*|
2180 <object[^>]+data=|
2181 new\s+SWFObject\(
2182 )
2183 (["\'])
2184 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2185 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2186 \1''', webpage)]
2187
2188 # lazyYT YouTube embed
2189 entries.extend(list(map(
2190 unescapeHTML,
2191 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2192
2193 # Wordpress "YouTube Video Importer" plugin
2194 matches = re.findall(r'''(?x)<div[^>]+
2195 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2196 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2197 entries.extend(m[-1] for m in matches)
2198
2199 return entries
2200
2201 @staticmethod
2202 def _extract_url(webpage):
2203 urls = YoutubeIE._extract_urls(webpage)
2204 return urls[0] if urls else None
2205
2206 @classmethod
2207 def extract_id(cls, url):
2208 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2209 if mobj is None:
2210 raise ExtractorError('Invalid URL: %s' % url)
2211 return mobj.group('id')
2212
2213 def _extract_chapters_from_json(self, data, duration):
2214 chapter_list = traverse_obj(
2215 data, (
2216 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2217 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2218 ), expected_type=list)
2219
2220 return self._extract_chapters(
2221 chapter_list,
2222 chapter_time=lambda chapter: float_or_none(
2223 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2224 chapter_title=lambda chapter: traverse_obj(
2225 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2226 duration=duration)
2227
2228 def _extract_chapters_from_engagement_panel(self, data, duration):
2229 content_list = traverse_obj(
2230 data,
2231 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2232 expected_type=list, default=[])
2233 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2234 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2235
2236 return next((
2237 filter(None, (
2238 self._extract_chapters(
2239 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2240 chapter_time, chapter_title, duration)
2241 for contents in content_list
2242 ))), [])
2243
2244 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2245 chapters = []
2246 last_chapter = {'start_time': 0}
2247 for idx, chapter in enumerate(chapter_list or []):
2248 title = chapter_title(chapter)
2249 start_time = chapter_time(chapter)
2250 if start_time is None:
2251 continue
2252 last_chapter['end_time'] = start_time
2253 if start_time < last_chapter['start_time']:
2254 if idx == 1:
2255 chapters.pop()
2256 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2257 else:
2258 self.report_warning(f'Invalid start time for chapter "{title}"')
2259 continue
2260 last_chapter = {'start_time': start_time, 'title': title}
2261 chapters.append(last_chapter)
2262 last_chapter['end_time'] = duration
2263 return chapters
2264
2265 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2266 return self._parse_json(self._search_regex(
2267 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2268 regex), webpage, name, default='{}'), video_id, fatal=False)
2269
2270 def _extract_comment(self, comment_renderer, parent=None):
2271 comment_id = comment_renderer.get('commentId')
2272 if not comment_id:
2273 return
2274
2275 text = self._get_text(comment_renderer, 'contentText')
2276
2277 # note: timestamp is an estimate calculated from the current time and time_text
2278 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
2279 author = self._get_text(comment_renderer, 'authorText')
2280 author_id = try_get(comment_renderer,
2281 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2282
2283 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2284 lambda x: x['likeCount']), compat_str)) or 0
2285 author_thumbnail = try_get(comment_renderer,
2286 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2287
2288 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2289 is_favorited = 'creatorHeart' in (try_get(
2290 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2291 return {
2292 'id': comment_id,
2293 'text': text,
2294 'timestamp': timestamp,
2295 'time_text': time_text,
2296 'like_count': votes,
2297 'is_favorited': is_favorited,
2298 'author': author,
2299 'author_id': author_id,
2300 'author_thumbnail': author_thumbnail,
2301 'author_is_uploader': author_is_uploader,
2302 'parent': parent or 'root'
2303 }
2304
2305 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2306
2307 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2308
2309 def extract_header(contents):
2310 _continuation = None
2311 for content in contents:
2312 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
2313 expected_comment_count = parse_count(self._get_text(
2314 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2315
2316 if expected_comment_count:
2317 tracker['est_total'] = expected_comment_count
2318 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2319 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2320
2321 sort_menu_item = try_get(
2322 comments_header_renderer,
2323 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2324 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2325
2326 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2327 if not _continuation:
2328 continue
2329
2330 sort_text = str_or_none(sort_menu_item.get('title'))
2331 if not sort_text:
2332 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2333 self.to_screen('Sorting comments by %s' % sort_text.lower())
2334 break
2335 return _continuation
2336
2337 def extract_thread(contents):
2338 if not parent:
2339 tracker['current_page_thread'] = 0
2340 for content in contents:
2341 if not parent and tracker['total_parent_comments'] >= max_parents:
2342 yield
2343 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2344 comment_renderer = get_first(
2345 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2346 expected_type=dict, default={})
2347
2348 comment = self._extract_comment(comment_renderer, parent)
2349 if not comment:
2350 continue
2351
2352 tracker['running_total'] += 1
2353 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
2354 yield comment
2355
2356 # Attempt to get the replies
2357 comment_replies_renderer = try_get(
2358 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2359
2360 if comment_replies_renderer:
2361 tracker['current_page_thread'] += 1
2362 comment_entries_iter = self._comment_entries(
2363 comment_replies_renderer, ytcfg, video_id,
2364 parent=comment.get('id'), tracker=tracker)
2365 for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):
2366 yield reply_comment
2367
2368 # Keeps track of counts across recursive calls
2369 if not tracker:
2370 tracker = dict(
2371 running_total=0,
2372 est_total=0,
2373 current_page_thread=0,
2374 total_parent_comments=0,
2375 total_reply_comments=0)
2376
2377 # TODO: Deprecated
2378 # YouTube comments have a max depth of 2
2379 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2380 if max_depth:
2381 self._downloader.deprecation_warning(
2382 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2383 if max_depth == 1 and parent:
2384 return
2385
2386 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2387 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2388
2389 continuation = self._extract_continuation(root_continuation_data)
2390 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2391 if message and not parent:
2392 self.report_warning(message, video_id=video_id)
2393
2394 response = None
2395 is_first_continuation = parent is None
2396
2397 for page_num in itertools.count(0):
2398 if not continuation:
2399 break
2400 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2401 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2402 if page_num == 0:
2403 if is_first_continuation:
2404 note_prefix = 'Downloading comment section API JSON'
2405 else:
2406 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2407 tracker['current_page_thread'], comment_prog_str)
2408 else:
2409 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2410 ' ' if parent else '', ' replies' if parent else '',
2411 page_num, comment_prog_str)
2412
2413 response = self._extract_response(
2414 item_id=None, query=continuation,
2415 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2416 check_get_keys='onResponseReceivedEndpoints')
2417
2418 continuation_contents = traverse_obj(
2419 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
2420
2421 continuation = None
2422 for continuation_section in continuation_contents:
2423 continuation_items = traverse_obj(
2424 continuation_section,
2425 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2426 get_all=False, expected_type=list) or []
2427 if is_first_continuation:
2428 continuation = extract_header(continuation_items)
2429 is_first_continuation = False
2430 if continuation:
2431 break
2432 continue
2433
2434 for entry in extract_thread(continuation_items):
2435 if not entry:
2436 return
2437 yield entry
2438 continuation = self._extract_continuation({'contents': continuation_items})
2439 if continuation:
2440 break
2441
2442 def _get_comments(self, ytcfg, video_id, contents, webpage):
2443 """Entry for comment extraction"""
2444 def _real_comment_extract(contents):
2445 renderer = next((
2446 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2447 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2448 yield from self._comment_entries(renderer, ytcfg, video_id)
2449
2450 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
2451 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
2452
2453 @staticmethod
2454 def _get_checkok_params():
2455 return {'contentCheckOk': True, 'racyCheckOk': True}
2456
2457 @classmethod
2458 def _generate_player_context(cls, sts=None):
2459 context = {
2460 'html5Preference': 'HTML5_PREF_WANTS',
2461 }
2462 if sts is not None:
2463 context['signatureTimestamp'] = sts
2464 return {
2465 'playbackContext': {
2466 'contentPlaybackContext': context
2467 },
2468 **cls._get_checkok_params()
2469 }
2470
2471 @staticmethod
2472 def _is_agegated(player_response):
2473 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2474 return True
2475
2476 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2477 AGE_GATE_REASONS = (
2478 'confirm your age', 'age-restricted', 'inappropriate', # reason
2479 'age_verification_required', 'age_check_required', # status
2480 )
2481 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2482
2483 @staticmethod
2484 def _is_unplayable(player_response):
2485 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2486
2487 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2488
2489 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2490 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2491 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2492 headers = self.generate_api_headers(
2493 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2494
2495 yt_query = {'videoId': video_id}
2496 yt_query.update(self._generate_player_context(sts))
2497 return self._extract_response(
2498 item_id=video_id, ep='player', query=yt_query,
2499 ytcfg=player_ytcfg, headers=headers, fatal=True,
2500 default_client=client,
2501 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2502 ) or None
2503
2504 def _get_requested_clients(self, url, smuggled_data):
2505 requested_clients = []
2506 default = ['android', 'web']
2507 allowed_clients = sorted(
2508 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2509 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2510 for client in self._configuration_arg('player_client'):
2511 if client in allowed_clients:
2512 requested_clients.append(client)
2513 elif client == 'default':
2514 requested_clients.extend(default)
2515 elif client == 'all':
2516 requested_clients.extend(allowed_clients)
2517 else:
2518 self.report_warning(f'Skipping unsupported client {client}')
2519 if not requested_clients:
2520 requested_clients = default
2521
2522 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2523 requested_clients.extend(
2524 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2525
2526 return orderedSet(requested_clients)
2527
2528 def _extract_player_ytcfg(self, client, video_id):
2529 url = {
2530 'web_music': 'https://music.youtube.com',
2531 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2532 }.get(client)
2533 if not url:
2534 return {}
2535 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2536 return self.extract_ytcfg(video_id, webpage) or {}
2537
2538 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2539 initial_pr = None
2540 if webpage:
2541 initial_pr = self._extract_yt_initial_variable(
2542 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2543 video_id, 'initial player response')
2544
2545 original_clients = clients
2546 clients = clients[::-1]
2547 prs = []
2548
2549 def append_client(client_name):
2550 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2551 clients.append(client_name)
2552
2553 # Android player_response does not have microFormats which are needed for
2554 # extraction of some data. So we return the initial_pr with formats
2555 # stripped out even if not requested by the user
2556 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2557 if initial_pr:
2558 pr = dict(initial_pr)
2559 pr['streamingData'] = None
2560 prs.append(pr)
2561
2562 last_error = None
2563 tried_iframe_fallback = False
2564 player_url = None
2565 while clients:
2566 client = clients.pop()
2567 player_ytcfg = master_ytcfg if client == 'web' else {}
2568 if 'configs' not in self._configuration_arg('player_skip'):
2569 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2570
2571 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2572 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2573 if 'js' in self._configuration_arg('player_skip'):
2574 require_js_player = False
2575 player_url = None
2576
2577 if not player_url and not tried_iframe_fallback and require_js_player:
2578 player_url = self._download_player_url(video_id)
2579 tried_iframe_fallback = True
2580
2581 try:
2582 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2583 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2584 except ExtractorError as e:
2585 if last_error:
2586 self.report_warning(last_error)
2587 last_error = e
2588 continue
2589
2590 if pr:
2591 prs.append(pr)
2592
2593 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2594 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2595 append_client(client.replace('_agegate', '_creator'))
2596 elif self._is_agegated(pr):
2597 append_client(f'{client}_agegate')
2598
2599 if last_error:
2600 if not len(prs):
2601 raise last_error
2602 self.report_warning(last_error)
2603 return prs, player_url
2604
2605 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2606 itags, stream_ids = {}, []
2607 itag_qualities, res_qualities = {}, {}
2608 q = qualities([
2609 # Normally tiny is the smallest video-only formats. But
2610 # audio-only formats with unknown quality may get tagged as tiny
2611 'tiny',
2612 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2613 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2614 ])
2615 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2616
2617 for fmt in streaming_formats:
2618 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2619 continue
2620
2621 itag = str_or_none(fmt.get('itag'))
2622 audio_track = fmt.get('audioTrack') or {}
2623 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2624 if stream_id in stream_ids:
2625 continue
2626
2627 quality = fmt.get('quality')
2628 height = int_or_none(fmt.get('height'))
2629 if quality == 'tiny' or not quality:
2630 quality = fmt.get('audioQuality', '').lower() or quality
2631 # The 3gp format (17) in android client has a quality of "small",
2632 # but is actually worse than other formats
2633 if itag == '17':
2634 quality = 'tiny'
2635 if quality:
2636 if itag:
2637 itag_qualities[itag] = quality
2638 if height:
2639 res_qualities[height] = quality
2640 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2641 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2642 # number of fragment that would subsequently requested with (`&sq=N`)
2643 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2644 continue
2645
2646 fmt_url = fmt.get('url')
2647 if not fmt_url:
2648 sc = compat_parse_qs(fmt.get('signatureCipher'))
2649 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2650 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2651 if not (sc and fmt_url and encrypted_sig):
2652 continue
2653 if not player_url:
2654 continue
2655 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2656 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2657 fmt_url += '&' + sp + '=' + signature
2658
2659 query = parse_qs(fmt_url)
2660 throttled = False
2661 if query.get('n'):
2662 try:
2663 fmt_url = update_url_query(fmt_url, {
2664 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2665 except ExtractorError as e:
2666 self.report_warning(
2667 f'nsig extraction failed: You may experience throttling for some formats\n'
2668 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
2669 throttled = True
2670
2671 if itag:
2672 itags[itag] = 'https'
2673 stream_ids.append(stream_id)
2674
2675 tbr = float_or_none(
2676 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2677 dct = {
2678 'asr': int_or_none(fmt.get('audioSampleRate')),
2679 'filesize': int_or_none(fmt.get('contentLength')),
2680 'format_id': itag,
2681 'format_note': join_nonempty(
2682 '%s%s' % (audio_track.get('displayName') or '',
2683 ' (default)' if audio_track.get('audioIsDefault') else ''),
2684 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
2685 throttled and 'THROTTLED', delim=', '),
2686 'source_preference': -10 if throttled else -1,
2687 'fps': int_or_none(fmt.get('fps')) or None,
2688 'height': height,
2689 'quality': q(quality),
2690 'tbr': tbr,
2691 'url': fmt_url,
2692 'width': int_or_none(fmt.get('width')),
2693 'language': audio_track.get('id', '').split('.')[0],
2694 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2695 }
2696 mime_mobj = re.match(
2697 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2698 if mime_mobj:
2699 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2700 dct.update(parse_codecs(mime_mobj.group(2)))
2701 no_audio = dct.get('acodec') == 'none'
2702 no_video = dct.get('vcodec') == 'none'
2703 if no_audio:
2704 dct['vbr'] = tbr
2705 if no_video:
2706 dct['abr'] = tbr
2707 if no_audio or no_video:
2708 dct['downloader_options'] = {
2709 # Youtube throttles chunks >~10M
2710 'http_chunk_size': 10485760,
2711 }
2712 if dct.get('ext'):
2713 dct['container'] = dct['ext'] + '_dash'
2714 yield dct
2715
2716 live_from_start = is_live and self.get_param('live_from_start')
2717 skip_manifests = self._configuration_arg('skip')
2718 if not self.get_param('youtube_include_hls_manifest', True):
2719 skip_manifests.append('hls')
2720 get_dash = 'dash' not in skip_manifests and (
2721 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
2722 get_hls = not live_from_start and 'hls' not in skip_manifests
2723
2724 def process_manifest_format(f, proto, itag):
2725 if itag in itags:
2726 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2727 return False
2728 itag = f'{itag}-{proto}'
2729 if itag:
2730 f['format_id'] = itag
2731 itags[itag] = proto
2732
2733 f['quality'] = next((
2734 q(qdict[val])
2735 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
2736 if val in qdict), -1)
2737 return True
2738
2739 for sd in streaming_data:
2740 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2741 if hls_manifest_url:
2742 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2743 if process_manifest_format(f, 'hls', self._search_regex(
2744 r'/itag/(\d+)', f['url'], 'itag', default=None)):
2745 yield f
2746
2747 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2748 if dash_manifest_url:
2749 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2750 if process_manifest_format(f, 'dash', f['format_id']):
2751 f['filesize'] = int_or_none(self._search_regex(
2752 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2753 if live_from_start:
2754 f['is_from_start'] = True
2755
2756 yield f
2757
2758 def _extract_storyboard(self, player_responses, duration):
2759 spec = get_first(
2760 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2761 if not spec:
2762 return
2763 base_url = spec.pop()
2764 L = len(spec) - 1
2765 for i, args in enumerate(spec):
2766 args = args.split('#')
2767 counts = list(map(int_or_none, args[:5]))
2768 if len(args) != 8 or not all(counts):
2769 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2770 continue
2771 width, height, frame_count, cols, rows = counts
2772 N, sigh = args[6:]
2773
2774 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2775 fragment_count = frame_count / (cols * rows)
2776 fragment_duration = duration / fragment_count
2777 yield {
2778 'format_id': f'sb{i}',
2779 'format_note': 'storyboard',
2780 'ext': 'mhtml',
2781 'protocol': 'mhtml',
2782 'acodec': 'none',
2783 'vcodec': 'none',
2784 'url': url,
2785 'width': width,
2786 'height': height,
2787 'fragments': [{
2788 'path': url.replace('$M', str(j)),
2789 'duration': min(fragment_duration, duration - (j * fragment_duration)),
2790 } for j in range(math.ceil(fragment_count))],
2791 }
2792
2793 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
2794 webpage = None
2795 if 'webpage' not in self._configuration_arg('player_skip'):
2796 webpage = self._download_webpage(
2797 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2798
2799 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2800
2801 player_responses, player_url = self._extract_player_responses(
2802 self._get_requested_clients(url, smuggled_data),
2803 video_id, webpage, master_ytcfg)
2804
2805 return webpage, master_ytcfg, player_responses, player_url
2806
2807 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):
2808 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2809 is_live = get_first(video_details, 'isLive')
2810 if is_live is None:
2811 is_live = get_first(live_broadcast_details, 'isLiveNow')
2812
2813 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2814 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2815
2816 return live_broadcast_details, is_live, streaming_data, formats
2817
2818 def _real_extract(self, url):
2819 url, smuggled_data = unsmuggle_url(url, {})
2820 video_id = self._match_id(url)
2821
2822 base_url = self.http_scheme() + '//www.youtube.com/'
2823 webpage_url = base_url + 'watch?v=' + video_id
2824
2825 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2826
2827 playability_statuses = traverse_obj(
2828 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2829
2830 trailer_video_id = get_first(
2831 playability_statuses,
2832 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2833 expected_type=str)
2834 if trailer_video_id:
2835 return self.url_result(
2836 trailer_video_id, self.ie_key(), trailer_video_id)
2837
2838 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2839 if webpage else (lambda x: None))
2840
2841 video_details = traverse_obj(
2842 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2843 microformats = traverse_obj(
2844 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2845 expected_type=dict, default=[])
2846 video_title = (
2847 get_first(video_details, 'title')
2848 or self._get_text(microformats, (..., 'title'))
2849 or search_meta(['og:title', 'twitter:title', 'title']))
2850 video_description = get_first(video_details, 'shortDescription')
2851
2852 multifeed_metadata_list = get_first(
2853 player_responses,
2854 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2855 expected_type=str)
2856 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2857 if self.get_param('noplaylist'):
2858 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2859 else:
2860 entries = []
2861 feed_ids = []
2862 for feed in multifeed_metadata_list.split(','):
2863 # Unquote should take place before split on comma (,) since textual
2864 # fields may contain comma as well (see
2865 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2866 feed_data = compat_parse_qs(
2867 compat_urllib_parse_unquote_plus(feed))
2868
2869 def feed_entry(name):
2870 return try_get(
2871 feed_data, lambda x: x[name][0], compat_str)
2872
2873 feed_id = feed_entry('id')
2874 if not feed_id:
2875 continue
2876 feed_title = feed_entry('title')
2877 title = video_title
2878 if feed_title:
2879 title += ' (%s)' % feed_title
2880 entries.append({
2881 '_type': 'url_transparent',
2882 'ie_key': 'Youtube',
2883 'url': smuggle_url(
2884 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2885 {'force_singlefeed': True}),
2886 'title': title,
2887 })
2888 feed_ids.append(feed_id)
2889 self.to_screen(
2890 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2891 % (', '.join(feed_ids), video_id))
2892 return self.playlist_result(
2893 entries, video_id, video_title, video_description)
2894
2895 live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)
2896
2897 if not formats:
2898 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2899 self.report_drm(video_id)
2900 pemr = get_first(
2901 playability_statuses,
2902 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2903 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2904 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2905 if subreason:
2906 if subreason == 'The uploader has not made this video available in your country.':
2907 countries = get_first(microformats, 'availableCountries')
2908 if not countries:
2909 regions_allowed = search_meta('regionsAllowed')
2910 countries = regions_allowed.split(',') if regions_allowed else None
2911 self.raise_geo_restricted(subreason, countries, metadata_available=True)
2912 reason += f'. {subreason}'
2913 if reason:
2914 self.raise_no_formats(reason, expected=True)
2915
2916 keywords = get_first(video_details, 'keywords', expected_type=list) or []
2917 if not keywords and webpage:
2918 keywords = [
2919 unescapeHTML(m.group('content'))
2920 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2921 for keyword in keywords:
2922 if keyword.startswith('yt:stretch='):
2923 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2924 if mobj:
2925 # NB: float is intentional for forcing float division
2926 w, h = (float(v) for v in mobj.groups())
2927 if w > 0 and h > 0:
2928 ratio = w / h
2929 for f in formats:
2930 if f.get('vcodec') != 'none':
2931 f['stretched_ratio'] = ratio
2932 break
2933 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
2934 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2935 if thumbnail_url:
2936 thumbnails.append({
2937 'url': thumbnail_url,
2938 })
2939 original_thumbnails = thumbnails.copy()
2940
2941 # The best resolution thumbnails sometimes does not appear in the webpage
2942 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2943 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2944 thumbnail_names = [
2945 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
2946 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2947 'mqdefault', 'mq1', 'mq2', 'mq3',
2948 'default', '1', '2', '3'
2949 ]
2950 n_thumbnail_names = len(thumbnail_names)
2951 thumbnails.extend({
2952 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2953 video_id=video_id, name=name, ext=ext,
2954 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2955 } for name in thumbnail_names for ext in ('webp', 'jpg'))
2956 for thumb in thumbnails:
2957 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2958 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2959 self._remove_duplicate_formats(thumbnails)
2960 self._downloader._sort_thumbnails(original_thumbnails)
2961
2962 category = get_first(microformats, 'category') or search_meta('genre')
2963 channel_id = str_or_none(
2964 get_first(video_details, 'channelId')
2965 or get_first(microformats, 'externalChannelId')
2966 or search_meta('channelId'))
2967 duration = int_or_none(
2968 get_first(video_details, 'lengthSeconds')
2969 or get_first(microformats, 'lengthSeconds')
2970 or parse_duration(search_meta('duration'))) or None
2971 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2972
2973 live_content = get_first(video_details, 'isLiveContent')
2974 is_upcoming = get_first(video_details, 'isUpcoming')
2975 if is_live is None:
2976 if is_upcoming or live_content is False:
2977 is_live = False
2978 if is_upcoming is None and (live_content or is_live):
2979 is_upcoming = False
2980 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2981 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2982 if not duration and live_end_time and live_start_time:
2983 duration = live_end_time - live_start_time
2984
2985 if is_live and self.get_param('live_from_start'):
2986 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
2987
2988 formats.extend(self._extract_storyboard(player_responses, duration))
2989
2990 # Source is given priority since formats that throttle are given lower source_preference
2991 # When throttling issue is fully fixed, remove this
2992 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2993
2994 info = {
2995 'id': video_id,
2996 'title': video_title,
2997 'formats': formats,
2998 'thumbnails': thumbnails,
2999 # The best thumbnail that we are sure exists. Prevents unnecessary
3000 # URL checking if user don't care about getting the best possible thumbnail
3001 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
3002 'description': video_description,
3003 'upload_date': unified_strdate(
3004 get_first(microformats, 'uploadDate')
3005 or search_meta('uploadDate')),
3006 'uploader': get_first(video_details, 'author'),
3007 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3008 'uploader_url': owner_profile_url,
3009 'channel_id': channel_id,
3010 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
3011 'duration': duration,
3012 'view_count': int_or_none(
3013 get_first((video_details, microformats), (..., 'viewCount'))
3014 or search_meta('interactionCount')),
3015 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
3016 'age_limit': 18 if (
3017 get_first(microformats, 'isFamilySafe') is False
3018 or search_meta('isFamilyFriendly') == 'false'
3019 or search_meta('og:restrictions:age') == '18+') else 0,
3020 'webpage_url': webpage_url,
3021 'categories': [category] if category else None,
3022 'tags': keywords,
3023 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
3024 'is_live': is_live,
3025 'was_live': (False if is_live or is_upcoming or live_content is False
3026 else None if is_live is None or is_upcoming is None
3027 else live_content),
3028 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
3029 'release_timestamp': live_start_time,
3030 }
3031
3032 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
3033 if pctr:
3034 def get_lang_code(track):
3035 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3036 or track.get('languageCode'))
3037
3038 # Converted into dicts to remove duplicates
3039 captions = {
3040 get_lang_code(sub): sub
3041 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3042 translation_languages = {
3043 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3044 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3045
3046 def process_language(container, base_url, lang_code, sub_name, query):
3047 lang_subs = container.setdefault(lang_code, [])
3048 for fmt in self._SUBTITLE_FORMATS:
3049 query.update({
3050 'fmt': fmt,
3051 })
3052 lang_subs.append({
3053 'ext': fmt,
3054 'url': update_url_query(base_url, query),
3055 'name': sub_name,
3056 })
3057
3058 subtitles, automatic_captions = {}, {}
3059 for lang_code, caption_track in captions.items():
3060 base_url = caption_track.get('baseUrl')
3061 if not base_url:
3062 continue
3063 lang_name = self._get_text(caption_track, 'name', max_runs=1)
3064 if caption_track.get('kind') != 'asr':
3065 if not lang_code:
3066 continue
3067 process_language(
3068 subtitles, base_url, lang_code, lang_name, {})
3069 if not caption_track.get('isTranslatable'):
3070 continue
3071 for trans_code, trans_name in translation_languages.items():
3072 if not trans_code:
3073 continue
3074 if caption_track.get('kind') != 'asr':
3075 trans_code += f'-{lang_code}'
3076 trans_name += format_field(lang_name, template=' from %s')
3077 process_language(
3078 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
3079 info['automatic_captions'] = automatic_captions
3080 info['subtitles'] = subtitles
3081
3082 parsed_url = compat_urllib_parse_urlparse(url)
3083 for component in [parsed_url.fragment, parsed_url.query]:
3084 query = compat_parse_qs(component)
3085 for k, v in query.items():
3086 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3087 d_k += '_time'
3088 if d_k not in info and k in s_ks:
3089 info[d_k] = parse_duration(query[k][0])
3090
3091 # Youtube Music Auto-generated description
3092 if video_description:
3093 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3094 if mobj:
3095 release_year = mobj.group('release_year')
3096 release_date = mobj.group('release_date')
3097 if release_date:
3098 release_date = release_date.replace('-', '')
3099 if not release_year:
3100 release_year = release_date[:4]
3101 info.update({
3102 'album': mobj.group('album'.strip()),
3103 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3104 'track': mobj.group('track').strip(),
3105 'release_date': release_date,
3106 'release_year': int_or_none(release_year),
3107 })
3108
3109 initial_data = None
3110 if webpage:
3111 initial_data = self._extract_yt_initial_variable(
3112 webpage, self._YT_INITIAL_DATA_RE, video_id,
3113 'yt initial data')
3114 if not initial_data:
3115 query = {'videoId': video_id}
3116 query.update(self._get_checkok_params())
3117 initial_data = self._extract_response(
3118 item_id=video_id, ep='next', fatal=False,
3119 ytcfg=master_ytcfg, query=query,
3120 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3121 note='Downloading initial data API JSON')
3122
3123 try:
3124 # This will error if there is no livechat
3125 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3126 info.setdefault('subtitles', {})['live_chat'] = [{
3127 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
3128 'video_id': video_id,
3129 'ext': 'json',
3130 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3131 }]
3132 except (KeyError, IndexError, TypeError):
3133 pass
3134
3135 if initial_data:
3136 info['chapters'] = (
3137 self._extract_chapters_from_json(initial_data, duration)
3138 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3139 or None)
3140
3141 contents = try_get(
3142 initial_data,
3143 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3144 list) or []
3145 for content in contents:
3146 vpir = content.get('videoPrimaryInfoRenderer')
3147 if vpir:
3148 stl = vpir.get('superTitleLink')
3149 if stl:
3150 stl = self._get_text(stl)
3151 if try_get(
3152 vpir,
3153 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3154 info['location'] = stl
3155 else:
3156 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3157 if mobj:
3158 info.update({
3159 'series': mobj.group(1),
3160 'season_number': int(mobj.group(2)),
3161 'episode_number': int(mobj.group(3)),
3162 })
3163 for tlb in (try_get(
3164 vpir,
3165 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3166 list) or []):
3167 tbr = tlb.get('toggleButtonRenderer') or {}
3168 for getter, regex in [(
3169 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3170 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3171 lambda x: x['accessibility'],
3172 lambda x: x['accessibilityData']['accessibilityData'],
3173 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3174 label = (try_get(tbr, getter, dict) or {}).get('label')
3175 if label:
3176 mobj = re.match(regex, label)
3177 if mobj:
3178 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3179 break
3180 sbr_tooltip = try_get(
3181 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3182 if sbr_tooltip:
3183 like_count, dislike_count = sbr_tooltip.split(' / ')
3184 info.update({
3185 'like_count': str_to_int(like_count),
3186 'dislike_count': str_to_int(dislike_count),
3187 })
3188 vsir = content.get('videoSecondaryInfoRenderer')
3189 if vsir:
3190 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3191 rows = try_get(
3192 vsir,
3193 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3194 list) or []
3195 multiple_songs = False
3196 for row in rows:
3197 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3198 multiple_songs = True
3199 break
3200 for row in rows:
3201 mrr = row.get('metadataRowRenderer') or {}
3202 mrr_title = mrr.get('title')
3203 if not mrr_title:
3204 continue
3205 mrr_title = self._get_text(mrr, 'title')
3206 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3207 if mrr_title == 'License':
3208 info['license'] = mrr_contents_text
3209 elif not multiple_songs:
3210 if mrr_title == 'Album':
3211 info['album'] = mrr_contents_text
3212 elif mrr_title == 'Artist':
3213 info['artist'] = mrr_contents_text
3214 elif mrr_title == 'Song':
3215 info['track'] = mrr_contents_text
3216
3217 fallbacks = {
3218 'channel': 'uploader',
3219 'channel_id': 'uploader_id',
3220 'channel_url': 'uploader_url',
3221 }
3222 for to, frm in fallbacks.items():
3223 if not info.get(to):
3224 info[to] = info.get(frm)
3225
3226 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3227 v = info.get(s_k)
3228 if v:
3229 info[d_k] = v
3230
3231 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3232 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3233 is_membersonly = None
3234 is_premium = None
3235 if initial_data and is_private is not None:
3236 is_membersonly = False
3237 is_premium = False
3238 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3239 badge_labels = set()
3240 for content in contents:
3241 if not isinstance(content, dict):
3242 continue
3243 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3244 for badge_label in badge_labels:
3245 if badge_label.lower() == 'members only':
3246 is_membersonly = True
3247 elif badge_label.lower() == 'premium':
3248 is_premium = True
3249 elif badge_label.lower() == 'unlisted':
3250 is_unlisted = True
3251
3252 info['availability'] = self._availability(
3253 is_private=is_private,
3254 needs_premium=is_premium,
3255 needs_subscription=is_membersonly,
3256 needs_auth=info['age_limit'] >= 18,
3257 is_unlisted=None if is_private is None else is_unlisted)
3258
3259 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3260
3261 self.mark_watched(video_id, player_responses)
3262
3263 return info
3264
3265
3266 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
3267
3268 def _extract_channel_id(self, webpage):
3269 channel_id = self._html_search_meta(
3270 'channelId', webpage, 'channel id', default=None)
3271 if channel_id:
3272 return channel_id
3273 channel_url = self._html_search_meta(
3274 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3275 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3276 'twitter:app:url:googleplay'), webpage, 'channel url')
3277 return self._search_regex(
3278 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3279 channel_url, 'channel id')
3280
3281 @staticmethod
3282 def _extract_basic_item_renderer(item):
3283 # Modified from _extract_grid_item_renderer
3284 known_basic_renderers = (
3285 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3286 )
3287 for key, renderer in item.items():
3288 if not isinstance(renderer, dict):
3289 continue
3290 elif key in known_basic_renderers:
3291 return renderer
3292 elif key.startswith('grid') and key.endswith('Renderer'):
3293 return renderer
3294
3295 def _grid_entries(self, grid_renderer):
3296 for item in grid_renderer['items']:
3297 if not isinstance(item, dict):
3298 continue
3299 renderer = self._extract_basic_item_renderer(item)
3300 if not isinstance(renderer, dict):
3301 continue
3302 title = self._get_text(renderer, 'title')
3303
3304 # playlist
3305 playlist_id = renderer.get('playlistId')
3306 if playlist_id:
3307 yield self.url_result(
3308 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3309 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3310 video_title=title)
3311 continue
3312 # video
3313 video_id = renderer.get('videoId')
3314 if video_id:
3315 yield self._extract_video(renderer)
3316 continue
3317 # channel
3318 channel_id = renderer.get('channelId')
3319 if channel_id:
3320 yield self.url_result(
3321 'https://www.youtube.com/channel/%s' % channel_id,
3322 ie=YoutubeTabIE.ie_key(), video_title=title)
3323 continue
3324 # generic endpoint URL support
3325 ep_url = urljoin('https://www.youtube.com/', try_get(
3326 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3327 compat_str))
3328 if ep_url:
3329 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3330 if ie.suitable(ep_url):
3331 yield self.url_result(
3332 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3333 break
3334
3335 def _shelf_entries_from_content(self, shelf_renderer):
3336 content = shelf_renderer.get('content')
3337 if not isinstance(content, dict):
3338 return
3339 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3340 if renderer:
3341 # TODO: add support for nested playlists so each shelf is processed
3342 # as separate playlist
3343 # TODO: this includes only first N items
3344 for entry in self._grid_entries(renderer):
3345 yield entry
3346 renderer = content.get('horizontalListRenderer')
3347 if renderer:
3348 # TODO
3349 pass
3350
3351 def _shelf_entries(self, shelf_renderer, skip_channels=False):
3352 ep = try_get(
3353 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3354 compat_str)
3355 shelf_url = urljoin('https://www.youtube.com', ep)
3356 if shelf_url:
3357 # Skipping links to another channels, note that checking for
3358 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3359 # will not work
3360 if skip_channels and '/channels?' in shelf_url:
3361 return
3362 title = self._get_text(shelf_renderer, 'title')
3363 yield self.url_result(shelf_url, video_title=title)
3364 # Shelf may not contain shelf URL, fallback to extraction from content
3365 for entry in self._shelf_entries_from_content(shelf_renderer):
3366 yield entry
3367
3368 def _playlist_entries(self, video_list_renderer):
3369 for content in video_list_renderer['contents']:
3370 if not isinstance(content, dict):
3371 continue
3372 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3373 if not isinstance(renderer, dict):
3374 continue
3375 video_id = renderer.get('videoId')
3376 if not video_id:
3377 continue
3378 yield self._extract_video(renderer)
3379
3380 def _rich_entries(self, rich_grid_renderer):
3381 renderer = try_get(
3382 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3383 video_id = renderer.get('videoId')
3384 if not video_id:
3385 return
3386 yield self._extract_video(renderer)
3387
3388 def _video_entry(self, video_renderer):
3389 video_id = video_renderer.get('videoId')
3390 if video_id:
3391 return self._extract_video(video_renderer)
3392
3393 def _post_thread_entries(self, post_thread_renderer):
3394 post_renderer = try_get(
3395 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3396 if not post_renderer:
3397 return
3398 # video attachment
3399 video_renderer = try_get(
3400 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3401 video_id = video_renderer.get('videoId')
3402 if video_id:
3403 entry = self._extract_video(video_renderer)
3404 if entry:
3405 yield entry
3406 # playlist attachment
3407 playlist_id = try_get(
3408 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3409 if playlist_id:
3410 yield self.url_result(
3411 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3412 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3413 # inline video links
3414 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3415 for run in runs:
3416 if not isinstance(run, dict):
3417 continue
3418 ep_url = try_get(
3419 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3420 if not ep_url:
3421 continue
3422 if not YoutubeIE.suitable(ep_url):
3423 continue
3424 ep_video_id = YoutubeIE._match_id(ep_url)
3425 if video_id == ep_video_id:
3426 continue
3427 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3428
3429 def _post_thread_continuation_entries(self, post_thread_continuation):
3430 contents = post_thread_continuation.get('contents')
3431 if not isinstance(contents, list):
3432 return
3433 for content in contents:
3434 renderer = content.get('backstagePostThreadRenderer')
3435 if not isinstance(renderer, dict):
3436 continue
3437 for entry in self._post_thread_entries(renderer):
3438 yield entry
3439
3440 r''' # unused
3441 def _rich_grid_entries(self, contents):
3442 for content in contents:
3443 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3444 if video_renderer:
3445 entry = self._video_entry(video_renderer)
3446 if entry:
3447 yield entry
3448 '''
3449 def _extract_entries(self, parent_renderer, continuation_list):
3450 # continuation_list is modified in-place with continuation_list = [continuation_token]
3451 continuation_list[:] = [None]
3452 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3453 for content in contents:
3454 if not isinstance(content, dict):
3455 continue
3456 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3457 if not is_renderer:
3458 renderer = content.get('richItemRenderer')
3459 if renderer:
3460 for entry in self._rich_entries(renderer):
3461 yield entry
3462 continuation_list[0] = self._extract_continuation(parent_renderer)
3463 continue
3464 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3465 for isr_content in isr_contents:
3466 if not isinstance(isr_content, dict):
3467 continue
3468
3469 known_renderers = {
3470 'playlistVideoListRenderer': self._playlist_entries,
3471 'gridRenderer': self._grid_entries,
3472 'shelfRenderer': lambda x: self._shelf_entries(x),
3473 'backstagePostThreadRenderer': self._post_thread_entries,
3474 'videoRenderer': lambda x: [self._video_entry(x)],
3475 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
3476 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
3477 }
3478 for key, renderer in isr_content.items():
3479 if key not in known_renderers:
3480 continue
3481 for entry in known_renderers[key](renderer):
3482 if entry:
3483 yield entry
3484 continuation_list[0] = self._extract_continuation(renderer)
3485 break
3486
3487 if not continuation_list[0]:
3488 continuation_list[0] = self._extract_continuation(is_renderer)
3489
3490 if not continuation_list[0]:
3491 continuation_list[0] = self._extract_continuation(parent_renderer)
3492
3493 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3494 continuation_list = [None]
3495 extract_entries = lambda x: self._extract_entries(x, continuation_list)
3496 tab_content = try_get(tab, lambda x: x['content'], dict)
3497 if not tab_content:
3498 return
3499 parent_renderer = (
3500 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3501 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3502 for entry in extract_entries(parent_renderer):
3503 yield entry
3504 continuation = continuation_list[0]
3505
3506 for page_num in itertools.count(1):
3507 if not continuation:
3508 break
3509 headers = self.generate_api_headers(
3510 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3511 response = self._extract_response(
3512 item_id='%s page %s' % (item_id, page_num),
3513 query=continuation, headers=headers, ytcfg=ytcfg,
3514 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3515
3516 if not response:
3517 break
3518 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3519 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3520 visitor_data = self._extract_visitor_data(response) or visitor_data
3521
3522 known_continuation_renderers = {
3523 'playlistVideoListContinuation': self._playlist_entries,
3524 'gridContinuation': self._grid_entries,
3525 'itemSectionContinuation': self._post_thread_continuation_entries,
3526 'sectionListContinuation': extract_entries, # for feeds
3527 }
3528 continuation_contents = try_get(
3529 response, lambda x: x['continuationContents'], dict) or {}
3530 continuation_renderer = None
3531 for key, value in continuation_contents.items():
3532 if key not in known_continuation_renderers:
3533 continue
3534 continuation_renderer = value
3535 continuation_list = [None]
3536 for entry in known_continuation_renderers[key](continuation_renderer):
3537 yield entry
3538 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3539 break
3540 if continuation_renderer:
3541 continue
3542
3543 known_renderers = {
3544 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3545 'gridVideoRenderer': (self._grid_entries, 'items'),
3546 'gridChannelRenderer': (self._grid_entries, 'items'),
3547 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3548 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
3549 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
3550 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3551 }
3552 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3553 continuation_items = try_get(
3554 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3555 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3556 video_items_renderer = None
3557 for key, value in continuation_item.items():
3558 if key not in known_renderers:
3559 continue
3560 video_items_renderer = {known_renderers[key][1]: continuation_items}
3561 continuation_list = [None]
3562 for entry in known_renderers[key][0](video_items_renderer):
3563 yield entry
3564 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3565 break
3566 if video_items_renderer:
3567 continue
3568 break
3569
3570 @staticmethod
3571 def _extract_selected_tab(tabs):
3572 for tab in tabs:
3573 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3574 if renderer.get('selected') is True:
3575 return renderer
3576 else:
3577 raise ExtractorError('Unable to find selected tab')
3578
3579 @classmethod
3580 def _extract_uploader(cls, data):
3581 uploader = {}
3582 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3583 owner = try_get(
3584 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3585 if owner:
3586 uploader['uploader'] = owner.get('text')
3587 uploader['uploader_id'] = try_get(
3588 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3589 uploader['uploader_url'] = urljoin(
3590 'https://www.youtube.com/',
3591 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3592 return {k: v for k, v in uploader.items() if v is not None}
3593
3594 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
3595 playlist_id = title = description = channel_url = channel_name = channel_id = None
3596 tags = []
3597
3598 selected_tab = self._extract_selected_tab(tabs)
3599 renderer = try_get(
3600 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3601 if renderer:
3602 channel_name = renderer.get('title')
3603 channel_url = renderer.get('channelUrl')
3604 channel_id = renderer.get('externalId')
3605 else:
3606 renderer = try_get(
3607 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3608
3609 if renderer:
3610 title = renderer.get('title')
3611 description = renderer.get('description', '')
3612 playlist_id = channel_id
3613 tags = renderer.get('keywords', '').split()
3614
3615 thumbnails = (
3616 self._extract_thumbnails(renderer, 'avatar')
3617 or self._extract_thumbnails(
3618 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3619 ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail')))
3620
3621 if playlist_id is None:
3622 playlist_id = item_id
3623 if title is None:
3624 title = (
3625 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3626 or playlist_id)
3627 title += format_field(selected_tab, 'title', ' - %s')
3628 title += format_field(selected_tab, 'expandedText', ' - %s')
3629 metadata = {
3630 'playlist_id': playlist_id,
3631 'playlist_title': title,
3632 'playlist_description': description,
3633 'uploader': channel_name,
3634 'uploader_id': channel_id,
3635 'uploader_url': channel_url,
3636 'thumbnails': thumbnails,
3637 'tags': tags,
3638 }
3639 availability = self._extract_availability(data)
3640 if availability:
3641 metadata['availability'] = availability
3642 if not channel_id:
3643 metadata.update(self._extract_uploader(data))
3644 metadata.update({
3645 'channel': metadata['uploader'],
3646 'channel_id': metadata['uploader_id'],
3647 'channel_url': metadata['uploader_url']})
3648 return self.playlist_result(
3649 self._entries(
3650 selected_tab, playlist_id, ytcfg,
3651 self._extract_account_syncid(ytcfg, data),
3652 self._extract_visitor_data(data, ytcfg)),
3653 **metadata)
3654
3655 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3656 first_id = last_id = response = None
3657 for page_num in itertools.count(1):
3658 videos = list(self._playlist_entries(playlist))
3659 if not videos:
3660 return
3661 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3662 if start >= len(videos):
3663 return
3664 for video in videos[start:]:
3665 if video['id'] == first_id:
3666 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3667 return
3668 yield video
3669 first_id = first_id or videos[0]['id']
3670 last_id = videos[-1]['id']
3671 watch_endpoint = try_get(
3672 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3673 headers = self.generate_api_headers(
3674 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3675 visitor_data=self._extract_visitor_data(response, data, ytcfg))
3676 query = {
3677 'playlistId': playlist_id,
3678 'videoId': watch_endpoint.get('videoId') or last_id,
3679 'index': watch_endpoint.get('index') or len(videos),
3680 'params': watch_endpoint.get('params') or 'OAE%3D'
3681 }
3682 response = self._extract_response(
3683 item_id='%s page %d' % (playlist_id, page_num),
3684 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3685 check_get_keys='contents'
3686 )
3687 playlist = try_get(
3688 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3689
3690 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
3691 title = playlist.get('title') or try_get(
3692 data, lambda x: x['titleText']['simpleText'], compat_str)
3693 playlist_id = playlist.get('playlistId') or item_id
3694
3695 # Delegating everything except mix playlists to regular tab-based playlist URL
3696 playlist_url = urljoin(url, try_get(
3697 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3698 compat_str))
3699 if playlist_url and playlist_url != url:
3700 return self.url_result(
3701 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3702 video_title=title)
3703
3704 return self.playlist_result(
3705 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
3706 playlist_id=playlist_id, playlist_title=title)
3707
3708 def _extract_availability(self, data):
3709 """
3710 Gets the availability of a given playlist/tab.
3711 Note: Unless YouTube tells us explicitly, we do not assume it is public
3712 @param data: response
3713 """
3714 is_private = is_unlisted = None
3715 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3716 badge_labels = self._extract_badges(renderer)
3717
3718 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3719 privacy_dropdown_entries = try_get(
3720 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3721 for renderer_dict in privacy_dropdown_entries:
3722 is_selected = try_get(
3723 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3724 if not is_selected:
3725 continue
3726 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
3727 if label:
3728 badge_labels.add(label.lower())
3729 break
3730
3731 for badge_label in badge_labels:
3732 if badge_label == 'unlisted':
3733 is_unlisted = True
3734 elif badge_label == 'private':
3735 is_private = True
3736 elif badge_label == 'public':
3737 is_unlisted = is_private = False
3738 return self._availability(is_private, False, False, False, is_unlisted)
3739
3740 @staticmethod
3741 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3742 sidebar_renderer = try_get(
3743 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3744 for item in sidebar_renderer:
3745 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3746 if renderer:
3747 return renderer
3748
3749 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
3750 """
3751 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3752 """
3753 browse_id = params = None
3754 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3755 if not renderer:
3756 return
3757 menu_renderer = try_get(
3758 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3759 for menu_item in menu_renderer:
3760 if not isinstance(menu_item, dict):
3761 continue
3762 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3763 text = try_get(
3764 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3765 if not text or text.lower() != 'show unavailable videos':
3766 continue
3767 browse_endpoint = try_get(
3768 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3769 browse_id = browse_endpoint.get('browseId')
3770 params = browse_endpoint.get('params')
3771 break
3772
3773 headers = self.generate_api_headers(
3774 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3775 visitor_data=self._extract_visitor_data(data, ytcfg))
3776 query = {
3777 'params': params or 'wgYCCAA=',
3778 'browseId': browse_id or 'VL%s' % item_id
3779 }
3780 return self._extract_response(
3781 item_id=item_id, headers=headers, query=query,
3782 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
3783 note='Downloading API JSON with unavailable videos')
3784
3785 def _extract_webpage(self, url, item_id, fatal=True):
3786 retries = self.get_param('extractor_retries', 3)
3787 count = -1
3788 webpage = data = last_error = None
3789 while count < retries:
3790 count += 1
3791 # Sometimes youtube returns a webpage with incomplete ytInitialData
3792 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3793 if last_error:
3794 self.report_warning('%s. Retrying ...' % last_error)
3795 try:
3796 webpage = self._download_webpage(
3797 url, item_id,
3798 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
3799 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
3800 except ExtractorError as e:
3801 if isinstance(e.cause, network_exceptions):
3802 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
3803 last_error = error_to_compat_str(e.cause or e.msg)
3804 if count < retries:
3805 continue
3806 if fatal:
3807 raise
3808 self.report_warning(error_to_compat_str(e))
3809 break
3810 else:
3811 try:
3812 self._extract_and_report_alerts(data)
3813 except ExtractorError as e:
3814 if fatal:
3815 raise
3816 self.report_warning(error_to_compat_str(e))
3817 break
3818
3819 if dict_get(data, ('contents', 'currentVideoEndpoint')):
3820 break
3821
3822 last_error = 'Incomplete yt initial data received'
3823 if count >= retries:
3824 if fatal:
3825 raise ExtractorError(last_error)
3826 self.report_warning(last_error)
3827 break
3828
3829 return webpage, data
3830
3831 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
3832 data = None
3833 if 'webpage' not in self._configuration_arg('skip'):
3834 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
3835 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
3836 if not data:
3837 if not ytcfg and self.is_authenticated:
3838 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
3839 if 'authcheck' not in self._configuration_arg('skip') and fatal:
3840 raise ExtractorError(
3841 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
3842 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
3843 expected=True)
3844 self.report_warning(msg, only_once=True)
3845 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
3846 return data, ytcfg
3847
3848 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
3849 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
3850 resolve_response = self._extract_response(
3851 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
3852 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
3853 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
3854 for ep_key, ep in endpoints.items():
3855 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
3856 if params:
3857 return self._extract_response(
3858 item_id=item_id, query=params, ep=ep, headers=headers,
3859 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
3860 check_get_keys=('contents', 'currentVideoEndpoint'))
3861 err_note = 'Failed to resolve url (does the playlist exist?)'
3862 if fatal:
3863 raise ExtractorError(err_note, expected=True)
3864 self.report_warning(err_note, item_id)
3865
3866 @staticmethod
3867 def _smuggle_data(entries, data):
3868 for entry in entries:
3869 if data:
3870 entry['url'] = smuggle_url(entry['url'], data)
3871 yield entry
3872
3873 _SEARCH_PARAMS = None
3874
3875 def _search_results(self, query, params=NO_DEFAULT):
3876 data = {'query': query}
3877 if params is NO_DEFAULT:
3878 params = self._SEARCH_PARAMS
3879 if params:
3880 data['params'] = params
3881 continuation_list = [None]
3882 for page_num in itertools.count(1):
3883 data.update(continuation_list[0] or {})
3884 search = self._extract_response(
3885 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3886 check_get_keys=('contents', 'onResponseReceivedCommands'))
3887 slr_contents = try_get(
3888 search,
3889 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3890 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3891 list)
3892 yield from self._extract_entries({'contents': slr_contents}, continuation_list)
3893 if not continuation_list[0]:
3894 break
3895
3896
3897 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
3898 IE_DESC = 'YouTube Tabs'
3899 _VALID_URL = r'''(?x:
3900 https?://
3901 (?:\w+\.)?
3902 (?:
3903 youtube(?:kids)?\.com|
3904 %(invidious)s
3905 )/
3906 (?:
3907 (?P<channel_type>channel|c|user|browse)/|
3908 (?P<not_channel>
3909 feed/|hashtag/|
3910 (?:playlist|watch)\?.*?\blist=
3911 )|
3912 (?!(?:%(reserved_names)s)\b) # Direct URLs
3913 )
3914 (?P<id>[^/?\#&]+)
3915 )''' % {
3916 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3917 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3918 }
3919 IE_NAME = 'youtube:tab'
3920
3921 _TESTS = [{
3922 'note': 'playlists, multipage',
3923 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3924 'playlist_mincount': 94,
3925 'info_dict': {
3926 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3927 'title': 'Игорь Клейнер - Playlists',
3928 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3929 'uploader': 'Игорь Клейнер',
3930 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3931 },
3932 }, {
3933 'note': 'playlists, multipage, different order',
3934 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3935 'playlist_mincount': 94,
3936 'info_dict': {
3937 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3938 'title': 'Игорь Клейнер - Playlists',
3939 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3940 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3941 'uploader': 'Игорь Клейнер',
3942 },
3943 }, {
3944 'note': 'playlists, series',
3945 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3946 'playlist_mincount': 5,
3947 'info_dict': {
3948 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3949 'title': '3Blue1Brown - Playlists',
3950 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3951 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3952 'uploader': '3Blue1Brown',
3953 },
3954 }, {
3955 'note': 'playlists, singlepage',
3956 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3957 'playlist_mincount': 4,
3958 'info_dict': {
3959 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3960 'title': 'ThirstForScience - Playlists',
3961 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3962 'uploader': 'ThirstForScience',
3963 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3964 }
3965 }, {
3966 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3967 'only_matching': True,
3968 }, {
3969 'note': 'basic, single video playlist',
3970 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3971 'info_dict': {
3972 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3973 'uploader': 'Sergey M.',
3974 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3975 'title': 'youtube-dl public playlist',
3976 },
3977 'playlist_count': 1,
3978 }, {
3979 'note': 'empty playlist',
3980 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3981 'info_dict': {
3982 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3983 'uploader': 'Sergey M.',
3984 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3985 'title': 'youtube-dl empty playlist',
3986 },
3987 'playlist_count': 0,
3988 }, {
3989 'note': 'Home tab',
3990 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3991 'info_dict': {
3992 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3993 'title': 'lex will - Home',
3994 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3995 'uploader': 'lex will',
3996 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3997 },
3998 'playlist_mincount': 2,
3999 }, {
4000 'note': 'Videos tab',
4001 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4002 'info_dict': {
4003 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4004 'title': 'lex will - Videos',
4005 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4006 'uploader': 'lex will',
4007 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4008 },
4009 'playlist_mincount': 975,
4010 }, {
4011 'note': 'Videos tab, sorted by popular',
4012 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4013 'info_dict': {
4014 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4015 'title': 'lex will - Videos',
4016 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4017 'uploader': 'lex will',
4018 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4019 },
4020 'playlist_mincount': 199,
4021 }, {
4022 'note': 'Playlists tab',
4023 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4024 'info_dict': {
4025 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4026 'title': 'lex will - Playlists',
4027 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4028 'uploader': 'lex will',
4029 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4030 },
4031 'playlist_mincount': 17,
4032 }, {
4033 'note': 'Community tab',
4034 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4035 'info_dict': {
4036 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4037 'title': 'lex will - Community',
4038 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4039 'uploader': 'lex will',
4040 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4041 },
4042 'playlist_mincount': 18,
4043 }, {
4044 'note': 'Channels tab',
4045 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4046 'info_dict': {
4047 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4048 'title': 'lex will - Channels',
4049 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4050 'uploader': 'lex will',
4051 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4052 },
4053 'playlist_mincount': 12,
4054 }, {
4055 'note': 'Search tab',
4056 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4057 'playlist_mincount': 40,
4058 'info_dict': {
4059 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4060 'title': '3Blue1Brown - Search - linear algebra',
4061 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4062 'uploader': '3Blue1Brown',
4063 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4064 },
4065 }, {
4066 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4067 'only_matching': True,
4068 }, {
4069 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4070 'only_matching': True,
4071 }, {
4072 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4073 'only_matching': True,
4074 }, {
4075 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4076 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4077 'info_dict': {
4078 'title': '29C3: Not my department',
4079 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4080 'uploader': 'Christiaan008',
4081 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4082 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
4083 },
4084 'playlist_count': 96,
4085 }, {
4086 'note': 'Large playlist',
4087 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4088 'info_dict': {
4089 'title': 'Uploads from Cauchemar',
4090 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4091 'uploader': 'Cauchemar',
4092 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4093 },
4094 'playlist_mincount': 1123,
4095 }, {
4096 'note': 'even larger playlist, 8832 videos',
4097 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4098 'only_matching': True,
4099 }, {
4100 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4101 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4102 'info_dict': {
4103 'title': 'Uploads from Interstellar Movie',
4104 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4105 'uploader': 'Interstellar Movie',
4106 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4107 },
4108 'playlist_mincount': 21,
4109 }, {
4110 'note': 'Playlist with "show unavailable videos" button',
4111 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4112 'info_dict': {
4113 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4114 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4115 'uploader': 'Phim Siêu Nhân Nhật Bản',
4116 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
4117 },
4118 'playlist_mincount': 200,
4119 }, {
4120 'note': 'Playlist with unavailable videos in page 7',
4121 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
4122 'info_dict': {
4123 'title': 'Uploads from BlankTV',
4124 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
4125 'uploader': 'BlankTV',
4126 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4127 },
4128 'playlist_mincount': 1000,
4129 }, {
4130 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
4131 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4132 'info_dict': {
4133 'title': 'Data Analysis with Dr Mike Pound',
4134 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4135 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4136 'uploader': 'Computerphile',
4137 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
4138 },
4139 'playlist_mincount': 11,
4140 }, {
4141 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4142 'only_matching': True,
4143 }, {
4144 'note': 'Playlist URL that does not actually serve a playlist',
4145 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
4146 'info_dict': {
4147 'id': 'FqZTN594JQw',
4148 'ext': 'webm',
4149 'title': "Smiley's People 01 detective, Adventure Series, Action",
4150 'uploader': 'STREEM',
4151 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4152 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4153 'upload_date': '20150526',
4154 'license': 'Standard YouTube License',
4155 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4156 'categories': ['People & Blogs'],
4157 'tags': list,
4158 'view_count': int,
4159 'like_count': int,
4160 'dislike_count': int,
4161 },
4162 'params': {
4163 'skip_download': True,
4164 },
4165 'skip': 'This video is not available.',
4166 'add_ie': [YoutubeIE.ie_key()],
4167 }, {
4168 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4169 'only_matching': True,
4170 }, {
4171 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4172 'only_matching': True,
4173 }, {
4174 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4175 'info_dict': {
4176 'id': '3yImotZU3tw', # This will keep changing
4177 'ext': 'mp4',
4178 'title': compat_str,
4179 'uploader': 'Sky News',
4180 'uploader_id': 'skynews',
4181 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4182 'upload_date': r're:\d{8}',
4183 'description': compat_str,
4184 'categories': ['News & Politics'],
4185 'tags': list,
4186 'like_count': int,
4187 'dislike_count': int,
4188 },
4189 'params': {
4190 'skip_download': True,
4191 },
4192 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
4193 }, {
4194 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4195 'info_dict': {
4196 'id': 'a48o2S1cPoo',
4197 'ext': 'mp4',
4198 'title': 'The Young Turks - Live Main Show',
4199 'uploader': 'The Young Turks',
4200 'uploader_id': 'TheYoungTurks',
4201 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4202 'upload_date': '20150715',
4203 'license': 'Standard YouTube License',
4204 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4205 'categories': ['News & Politics'],
4206 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4207 'like_count': int,
4208 'dislike_count': int,
4209 },
4210 'params': {
4211 'skip_download': True,
4212 },
4213 'only_matching': True,
4214 }, {
4215 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4216 'only_matching': True,
4217 }, {
4218 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4219 'only_matching': True,
4220 }, {
4221 'note': 'A channel that is not live. Should raise error',
4222 'url': 'https://www.youtube.com/user/numberphile/live',
4223 'only_matching': True,
4224 }, {
4225 'url': 'https://www.youtube.com/feed/trending',
4226 'only_matching': True,
4227 }, {
4228 'url': 'https://www.youtube.com/feed/library',
4229 'only_matching': True,
4230 }, {
4231 'url': 'https://www.youtube.com/feed/history',
4232 'only_matching': True,
4233 }, {
4234 'url': 'https://www.youtube.com/feed/subscriptions',
4235 'only_matching': True,
4236 }, {
4237 'url': 'https://www.youtube.com/feed/watch_later',
4238 'only_matching': True,
4239 }, {
4240 'note': 'Recommended - redirects to home page.',
4241 'url': 'https://www.youtube.com/feed/recommended',
4242 'only_matching': True,
4243 }, {
4244 'note': 'inline playlist with not always working continuations',
4245 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4246 'only_matching': True,
4247 }, {
4248 'url': 'https://www.youtube.com/course',
4249 'only_matching': True,
4250 }, {
4251 'url': 'https://www.youtube.com/zsecurity',
4252 'only_matching': True,
4253 }, {
4254 'url': 'http://www.youtube.com/NASAgovVideo/videos',
4255 'only_matching': True,
4256 }, {
4257 'url': 'https://www.youtube.com/TheYoungTurks/live',
4258 'only_matching': True,
4259 }, {
4260 'url': 'https://www.youtube.com/hashtag/cctv9',
4261 'info_dict': {
4262 'id': 'cctv9',
4263 'title': '#cctv9',
4264 },
4265 'playlist_mincount': 350,
4266 }, {
4267 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4268 'only_matching': True,
4269 }, {
4270 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4271 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4272 'only_matching': True
4273 }, {
4274 'note': '/browse/ should redirect to /channel/',
4275 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4276 'only_matching': True
4277 }, {
4278 'note': 'VLPL, should redirect to playlist?list=PL...',
4279 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4280 'info_dict': {
4281 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4282 'uploader': 'NoCopyrightSounds',
4283 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
4284 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4285 'title': 'NCS Releases',
4286 },
4287 'playlist_mincount': 166,
4288 }, {
4289 'note': 'Topic, should redirect to playlist?list=UU...',
4290 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4291 'info_dict': {
4292 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4293 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4294 'title': 'Uploads from Royalty Free Music - Topic',
4295 'uploader': 'Royalty Free Music - Topic',
4296 },
4297 'expected_warnings': [
4298 'A channel/user page was given',
4299 'The URL does not have a videos tab',
4300 ],
4301 'playlist_mincount': 101,
4302 }, {
4303 'note': 'Topic without a UU playlist',
4304 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
4305 'info_dict': {
4306 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
4307 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
4308 },
4309 'expected_warnings': [
4310 'A channel/user page was given',
4311 'The URL does not have a videos tab',
4312 'Falling back to channel URL',
4313 ],
4314 'playlist_mincount': 9,
4315 }, {
4316 'note': 'Youtube music Album',
4317 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
4318 'info_dict': {
4319 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
4320 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
4321 },
4322 'playlist_count': 50,
4323 }, {
4324 'note': 'unlisted single video playlist',
4325 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4326 'info_dict': {
4327 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4328 'uploader': 'colethedj',
4329 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4330 'title': 'yt-dlp unlisted playlist test',
4331 'availability': 'unlisted'
4332 },
4333 'playlist_count': 1,
4334 }, {
4335 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
4336 'url': 'https://www.youtube.com/feed/recommended',
4337 'info_dict': {
4338 'id': 'recommended',
4339 'title': 'recommended',
4340 },
4341 'playlist_mincount': 50,
4342 'params': {
4343 'skip_download': True,
4344 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4345 },
4346 }, {
4347 'note': 'API Fallback: /videos tab, sorted by oldest first',
4348 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
4349 'info_dict': {
4350 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4351 'title': 'Cody\'sLab - Videos',
4352 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
4353 'uploader': 'Cody\'sLab',
4354 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4355 },
4356 'playlist_mincount': 650,
4357 'params': {
4358 'skip_download': True,
4359 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4360 },
4361 }, {
4362 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
4363 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4364 'info_dict': {
4365 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4366 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4367 'title': 'Uploads from Royalty Free Music - Topic',
4368 'uploader': 'Royalty Free Music - Topic',
4369 },
4370 'expected_warnings': [
4371 'A channel/user page was given',
4372 'The URL does not have a videos tab',
4373 ],
4374 'playlist_mincount': 101,
4375 'params': {
4376 'skip_download': True,
4377 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4378 },
4379 }]
4380
4381 @classmethod
4382 def suitable(cls, url):
4383 return False if YoutubeIE.suitable(url) else super(
4384 YoutubeTabIE, cls).suitable(url)
4385
4386 def _real_extract(self, url):
4387 url, smuggled_data = unsmuggle_url(url, {})
4388 if self.is_music_url(url):
4389 smuggled_data['is_music_url'] = True
4390 info_dict = self.__real_extract(url, smuggled_data)
4391 if info_dict.get('entries'):
4392 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4393 return info_dict
4394
4395 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$')
4396
4397 def __real_extract(self, url, smuggled_data):
4398 item_id = self._match_id(url)
4399 url = compat_urlparse.urlunparse(
4400 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4401 compat_opts = self.get_param('compat_opts', [])
4402
4403 def get_mobj(url):
4404 mobj = self._URL_RE.match(url).groupdict()
4405 mobj.update((k, '') for k, v in mobj.items() if v is None)
4406 return mobj
4407
4408 mobj, redirect_warning = get_mobj(url), None
4409 # Youtube returns incomplete data if tabname is not lower case
4410 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4411 if is_channel:
4412 if smuggled_data.get('is_music_url'):
4413 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
4414 item_id = item_id[2:]
4415 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
4416 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4417 mdata = self._extract_tab_endpoint(
4418 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
4419 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
4420 get_all=False, expected_type=compat_str)
4421 if not murl:
4422 raise ExtractorError('Failed to resolve album to playlist')
4423 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
4424 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
4425 pre = f'https://www.youtube.com/channel/{item_id}'
4426
4427 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4428 # Home URLs should redirect to /videos/
4429 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
4430 'To download only the videos in the home page, add a "/featured" to the URL')
4431 tab = '/videos'
4432
4433 url = ''.join((pre, tab, post))
4434 mobj = get_mobj(url)
4435
4436 # Handle both video/playlist URLs
4437 qs = parse_qs(url)
4438 video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]
4439
4440 if not video_id and mobj['not_channel'].startswith('watch'):
4441 if not playlist_id:
4442 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4443 raise ExtractorError('Unable to recognize tab page')
4444 # Common mistake: https://www.youtube.com/watch?list=playlist_id
4445 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
4446 url = f'https://www.youtube.com/playlist?list={playlist_id}'
4447 mobj = get_mobj(url)
4448
4449 if video_id and playlist_id:
4450 if self.get_param('noplaylist'):
4451 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
4452 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4453 ie=YoutubeIE.ie_key(), video_id=video_id)
4454 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
4455
4456 data, ytcfg = self._extract_data(url, item_id)
4457
4458 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
4459 if tabs:
4460 selected_tab = self._extract_selected_tab(tabs)
4461 tab_name = selected_tab.get('title', '')
4462 if 'no-youtube-channel-redirect' not in compat_opts:
4463 if mobj['tab'] == '/live':
4464 # Live tab should have redirected to the video
4465 raise ExtractorError('The channel is not currently live', expected=True)
4466 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4467 redirect_warning = f'The URL does not have a {mobj["tab"][1:]} tab'
4468 if not mobj['not_channel'] and item_id[:2] == 'UC':
4469 # Topic channels don't have /videos. Use the equivalent playlist instead
4470 pl_id = f'UU{item_id[2:]}'
4471 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
4472 try:
4473 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True)
4474 except ExtractorError:
4475 redirect_warning += ' and the playlist redirect gave error'
4476 else:
4477 item_id, url, tab_name = pl_id, pl_url, mobj['tab'][1:]
4478 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
4479 if tab_name.lower() != mobj['tab'][1:]:
4480 redirect_warning += f'. {tab_name} tab is being downloaded instead'
4481
4482 if redirect_warning:
4483 self.report_warning(redirect_warning)
4484 self.write_debug(f'Final URL: {url}')
4485
4486 # YouTube sometimes provides a button to reload playlist with unavailable videos.
4487 if 'no-youtube-unavailable-videos' not in compat_opts:
4488 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
4489 self._extract_and_report_alerts(data, only_once=True)
4490 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
4491 if tabs:
4492 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
4493
4494 playlist = traverse_obj(
4495 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
4496 if playlist:
4497 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
4498
4499 video_id = traverse_obj(
4500 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
4501 if video_id:
4502 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4503 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
4504 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4505 ie=YoutubeIE.ie_key(), video_id=video_id)
4506
4507 raise ExtractorError('Unable to recognize tab page')
4508
4509
4510 class YoutubePlaylistIE(InfoExtractor):
4511 IE_DESC = 'YouTube playlists'
4512 _VALID_URL = r'''(?x)(?:
4513 (?:https?://)?
4514 (?:\w+\.)?
4515 (?:
4516 (?:
4517 youtube(?:kids)?\.com|
4518 %(invidious)s
4519 )
4520 /.*?\?.*?\blist=
4521 )?
4522 (?P<id>%(playlist_id)s)
4523 )''' % {
4524 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4525 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4526 }
4527 IE_NAME = 'youtube:playlist'
4528 _TESTS = [{
4529 'note': 'issue #673',
4530 'url': 'PLBB231211A4F62143',
4531 'info_dict': {
4532 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4533 'id': 'PLBB231211A4F62143',
4534 'uploader': 'Wickydoo',
4535 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4536 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4537 },
4538 'playlist_mincount': 29,
4539 }, {
4540 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4541 'info_dict': {
4542 'title': 'YDL_safe_search',
4543 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4544 },
4545 'playlist_count': 2,
4546 'skip': 'This playlist is private',
4547 }, {
4548 'note': 'embedded',
4549 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4550 'playlist_count': 4,
4551 'info_dict': {
4552 'title': 'JODA15',
4553 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4554 'uploader': 'milan',
4555 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4556 }
4557 }, {
4558 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4559 'playlist_mincount': 654,
4560 'info_dict': {
4561 'title': '2018 Chinese New Singles (11/6 updated)',
4562 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4563 'uploader': 'LBK',
4564 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4565 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4566 }
4567 }, {
4568 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4569 'only_matching': True,
4570 }, {
4571 # music album playlist
4572 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4573 'only_matching': True,
4574 }]
4575
4576 @classmethod
4577 def suitable(cls, url):
4578 if YoutubeTabIE.suitable(url):
4579 return False
4580 from ..utils import parse_qs
4581 qs = parse_qs(url)
4582 if qs.get('v', [None])[0]:
4583 return False
4584 return super(YoutubePlaylistIE, cls).suitable(url)
4585
4586 def _real_extract(self, url):
4587 playlist_id = self._match_id(url)
4588 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4589 url = update_url_query(
4590 'https://www.youtube.com/playlist',
4591 parse_qs(url) or {'list': playlist_id})
4592 if is_music_url:
4593 url = smuggle_url(url, {'is_music_url': True})
4594 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4595
4596
4597 class YoutubeYtBeIE(InfoExtractor):
4598 IE_DESC = 'youtu.be'
4599 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4600 _TESTS = [{
4601 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4602 'info_dict': {
4603 'id': 'yeWKywCrFtk',
4604 'ext': 'mp4',
4605 'title': 'Small Scale Baler and Braiding Rugs',
4606 'uploader': 'Backus-Page House Museum',
4607 'uploader_id': 'backuspagemuseum',
4608 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4609 'upload_date': '20161008',
4610 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4611 'categories': ['Nonprofits & Activism'],
4612 'tags': list,
4613 'like_count': int,
4614 'dislike_count': int,
4615 },
4616 'params': {
4617 'noplaylist': True,
4618 'skip_download': True,
4619 },
4620 }, {
4621 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4622 'only_matching': True,
4623 }]
4624
4625 def _real_extract(self, url):
4626 mobj = self._match_valid_url(url)
4627 video_id = mobj.group('id')
4628 playlist_id = mobj.group('playlist_id')
4629 return self.url_result(
4630 update_url_query('https://www.youtube.com/watch', {
4631 'v': video_id,
4632 'list': playlist_id,
4633 'feature': 'youtu.be',
4634 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4635
4636
4637 class YoutubeYtUserIE(InfoExtractor):
4638 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
4639 _VALID_URL = r'ytuser:(?P<id>.+)'
4640 _TESTS = [{
4641 'url': 'ytuser:phihag',
4642 'only_matching': True,
4643 }]
4644
4645 def _real_extract(self, url):
4646 user_id = self._match_id(url)
4647 return self.url_result(
4648 'https://www.youtube.com/user/%s/videos' % user_id,
4649 ie=YoutubeTabIE.ie_key(), video_id=user_id)
4650
4651
4652 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4653 IE_NAME = 'youtube:favorites'
4654 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
4655 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4656 _LOGIN_REQUIRED = True
4657 _TESTS = [{
4658 'url': ':ytfav',
4659 'only_matching': True,
4660 }, {
4661 'url': ':ytfavorites',
4662 'only_matching': True,
4663 }]
4664
4665 def _real_extract(self, url):
4666 return self.url_result(
4667 'https://www.youtube.com/playlist?list=LL',
4668 ie=YoutubeTabIE.ie_key())
4669
4670
4671 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4672 IE_DESC = 'YouTube search'
4673 IE_NAME = 'youtube:search'
4674 _SEARCH_KEY = 'ytsearch'
4675 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
4676 _TESTS = []
4677
4678
4679 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4680 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4681 _SEARCH_KEY = 'ytsearchdate'
4682 IE_DESC = 'YouTube search, newest videos first'
4683 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
4684
4685
4686 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
4687 IE_DESC = 'YouTube search URLs with sorting and filter support'
4688 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4689 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4690 _TESTS = [{
4691 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4692 'playlist_mincount': 5,
4693 'info_dict': {
4694 'id': 'youtube-dl test video',
4695 'title': 'youtube-dl test video',
4696 }
4697 }, {
4698 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
4699 'playlist_mincount': 5,
4700 'info_dict': {
4701 'id': 'python',
4702 'title': 'python',
4703 }
4704
4705 }, {
4706 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4707 'only_matching': True,
4708 }]
4709
4710 def _real_extract(self, url):
4711 qs = parse_qs(url)
4712 query = (qs.get('search_query') or qs.get('q'))[0]
4713 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
4714
4715
4716 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4717 """
4718 Base class for feed extractors
4719 Subclasses must define the _FEED_NAME property.
4720 """
4721 _LOGIN_REQUIRED = True
4722 _TESTS = []
4723
4724 @property
4725 def IE_NAME(self):
4726 return 'youtube:%s' % self._FEED_NAME
4727
4728 def _real_extract(self, url):
4729 return self.url_result(
4730 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4731 ie=YoutubeTabIE.ie_key())
4732
4733
4734 class YoutubeWatchLaterIE(InfoExtractor):
4735 IE_NAME = 'youtube:watchlater'
4736 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
4737 _VALID_URL = r':ytwatchlater'
4738 _TESTS = [{
4739 'url': ':ytwatchlater',
4740 'only_matching': True,
4741 }]
4742
4743 def _real_extract(self, url):
4744 return self.url_result(
4745 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4746
4747
4748 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4749 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
4750 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4751 _FEED_NAME = 'recommended'
4752 _LOGIN_REQUIRED = False
4753 _TESTS = [{
4754 'url': ':ytrec',
4755 'only_matching': True,
4756 }, {
4757 'url': ':ytrecommended',
4758 'only_matching': True,
4759 }, {
4760 'url': 'https://youtube.com',
4761 'only_matching': True,
4762 }]
4763
4764
4765 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4766 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
4767 _VALID_URL = r':ytsub(?:scription)?s?'
4768 _FEED_NAME = 'subscriptions'
4769 _TESTS = [{
4770 'url': ':ytsubs',
4771 'only_matching': True,
4772 }, {
4773 'url': ':ytsubscriptions',
4774 'only_matching': True,
4775 }]
4776
4777
4778 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4779 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
4780 _VALID_URL = r':ythis(?:tory)?'
4781 _FEED_NAME = 'history'
4782 _TESTS = [{
4783 'url': ':ythistory',
4784 'only_matching': True,
4785 }]
4786
4787
4788 class YoutubeTruncatedURLIE(InfoExtractor):
4789 IE_NAME = 'youtube:truncated_url'
4790 IE_DESC = False # Do not list
4791 _VALID_URL = r'''(?x)
4792 (?:https?://)?
4793 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4794 (?:watch\?(?:
4795 feature=[a-z_]+|
4796 annotation_id=annotation_[^&]+|
4797 x-yt-cl=[0-9]+|
4798 hl=[^&]*|
4799 t=[0-9]+
4800 )?
4801 |
4802 attribution_link\?a=[^&]+
4803 )
4804 $
4805 '''
4806
4807 _TESTS = [{
4808 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4809 'only_matching': True,
4810 }, {
4811 'url': 'https://www.youtube.com/watch?',
4812 'only_matching': True,
4813 }, {
4814 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4815 'only_matching': True,
4816 }, {
4817 'url': 'https://www.youtube.com/watch?feature=foo',
4818 'only_matching': True,
4819 }, {
4820 'url': 'https://www.youtube.com/watch?hl=en-GB',
4821 'only_matching': True,
4822 }, {
4823 'url': 'https://www.youtube.com/watch?t=2372',
4824 'only_matching': True,
4825 }]
4826
4827 def _real_extract(self, url):
4828 raise ExtractorError(
4829 'Did you forget to quote the URL? Remember that & is a meta '
4830 'character in most shells, so you want to put the URL in quotes, '
4831 'like youtube-dl '
4832 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4833 ' or simply youtube-dl BaW_jenozKc .',
4834 expected=True)
4835
4836
4837 class YoutubeClipIE(InfoExtractor):
4838 IE_NAME = 'youtube:clip'
4839 IE_DESC = False # Do not list
4840 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4841
4842 def _real_extract(self, url):
4843 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4844 return self.url_result(url, 'Generic')
4845
4846
4847 class YoutubeTruncatedIDIE(InfoExtractor):
4848 IE_NAME = 'youtube:truncated_id'
4849 IE_DESC = False # Do not list
4850 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4851
4852 _TESTS = [{
4853 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4854 'only_matching': True,
4855 }]
4856
4857 def _real_extract(self, url):
4858 video_id = self._match_id(url)
4859 raise ExtractorError(
4860 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4861 expected=True)