]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[extractor/youtube] Use device-specific user agent (#4770)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 import base64
2 import calendar
3 import copy
4 import datetime
5 import hashlib
6 import itertools
7 import json
8 import math
9 import os.path
10 import random
11 import re
12 import sys
13 import threading
14 import time
15 import traceback
16 import urllib.error
17 import urllib.parse
18
19 from .common import InfoExtractor, SearchInfoExtractor
20 from .openload import PhantomJSwrapper
21 from ..compat import functools
22 from ..jsinterp import JSInterpreter
23 from ..utils import (
24 NO_DEFAULT,
25 ExtractorError,
26 UserNotLive,
27 bug_reports_message,
28 classproperty,
29 clean_html,
30 datetime_from_str,
31 dict_get,
32 float_or_none,
33 format_field,
34 get_first,
35 int_or_none,
36 is_html,
37 join_nonempty,
38 js_to_json,
39 mimetype2ext,
40 network_exceptions,
41 orderedSet,
42 parse_codecs,
43 parse_count,
44 parse_duration,
45 parse_iso8601,
46 parse_qs,
47 qualities,
48 remove_start,
49 smuggle_url,
50 str_or_none,
51 str_to_int,
52 strftime_or_none,
53 traverse_obj,
54 try_get,
55 unescapeHTML,
56 unified_strdate,
57 unified_timestamp,
58 unsmuggle_url,
59 update_url_query,
60 url_or_none,
61 urljoin,
62 variadic,
63 )
64
65 # any clients starting with _ cannot be explicitly requested by the user
66 INNERTUBE_CLIENTS = {
67 'web': {
68 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
69 'INNERTUBE_CONTEXT': {
70 'client': {
71 'clientName': 'WEB',
72 'clientVersion': '2.20220801.00.00',
73 }
74 },
75 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
76 },
77 'web_embedded': {
78 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
79 'INNERTUBE_CONTEXT': {
80 'client': {
81 'clientName': 'WEB_EMBEDDED_PLAYER',
82 'clientVersion': '1.20220731.00.00',
83 },
84 },
85 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
86 },
87 'web_music': {
88 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
89 'INNERTUBE_HOST': 'music.youtube.com',
90 'INNERTUBE_CONTEXT': {
91 'client': {
92 'clientName': 'WEB_REMIX',
93 'clientVersion': '1.20220727.01.00',
94 }
95 },
96 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
97 },
98 'web_creator': {
99 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
100 'INNERTUBE_CONTEXT': {
101 'client': {
102 'clientName': 'WEB_CREATOR',
103 'clientVersion': '1.20220726.00.00',
104 }
105 },
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
107 },
108 'android': {
109 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
110 'INNERTUBE_CONTEXT': {
111 'client': {
112 'clientName': 'ANDROID',
113 'clientVersion': '17.31.35',
114 'androidSdkVersion': 30,
115 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
116 }
117 },
118 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
119 'REQUIRE_JS_PLAYER': False
120 },
121 'android_embedded': {
122 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
123 'INNERTUBE_CONTEXT': {
124 'client': {
125 'clientName': 'ANDROID_EMBEDDED_PLAYER',
126 'clientVersion': '17.31.35',
127 'androidSdkVersion': 30,
128 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
129 },
130 },
131 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
132 'REQUIRE_JS_PLAYER': False
133 },
134 'android_music': {
135 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
136 'INNERTUBE_CONTEXT': {
137 'client': {
138 'clientName': 'ANDROID_MUSIC',
139 'clientVersion': '5.16.51',
140 'androidSdkVersion': 30,
141 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
142 }
143 },
144 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
145 'REQUIRE_JS_PLAYER': False
146 },
147 'android_creator': {
148 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
149 'INNERTUBE_CONTEXT': {
150 'client': {
151 'clientName': 'ANDROID_CREATOR',
152 'clientVersion': '22.30.100',
153 'androidSdkVersion': 30,
154 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
155 },
156 },
157 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
158 'REQUIRE_JS_PLAYER': False
159 },
160 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
161 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
162 'ios': {
163 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
164 'INNERTUBE_CONTEXT': {
165 'client': {
166 'clientName': 'IOS',
167 'clientVersion': '17.30.1',
168 'deviceModel': 'iPhone14,3',
169 'userAgent': 'com.google.ios.youtube/17.30.1 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
170 }
171 },
172 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
173 'REQUIRE_JS_PLAYER': False
174 },
175 'ios_embedded': {
176 'INNERTUBE_CONTEXT': {
177 'client': {
178 'clientName': 'IOS_MESSAGES_EXTENSION',
179 'clientVersion': '17.30.1',
180 'deviceModel': 'iPhone14,3',
181 'userAgent': 'com.google.ios.youtube/17.30.1 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
182 },
183 },
184 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
185 'REQUIRE_JS_PLAYER': False
186 },
187 'ios_music': {
188 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
189 'INNERTUBE_CONTEXT': {
190 'client': {
191 'clientName': 'IOS_MUSIC',
192 'clientVersion': '5.18',
193 },
194 },
195 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
196 'REQUIRE_JS_PLAYER': False
197 },
198 'ios_creator': {
199 'INNERTUBE_CONTEXT': {
200 'client': {
201 'clientName': 'IOS_CREATOR',
202 'clientVersion': '22.29.101',
203 },
204 },
205 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
206 'REQUIRE_JS_PLAYER': False
207 },
208 # mweb has 'ultralow' formats
209 # See: https://github.com/yt-dlp/yt-dlp/pull/557
210 'mweb': {
211 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
212 'INNERTUBE_CONTEXT': {
213 'client': {
214 'clientName': 'MWEB',
215 'clientVersion': '2.20220801.00.00',
216 }
217 },
218 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
219 },
220 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
221 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
222 'tv_embedded': {
223 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
224 'INNERTUBE_CONTEXT': {
225 'client': {
226 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
227 'clientVersion': '2.0',
228 },
229 },
230 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
231 },
232 }
233
234
235 def _split_innertube_client(client_name):
236 variant, *base = client_name.rsplit('.', 1)
237 if base:
238 return variant, base[0], variant
239 base, *variant = client_name.split('_', 1)
240 return client_name, base, variant[0] if variant else None
241
242
243 def build_innertube_clients():
244 THIRD_PARTY = {
245 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
246 }
247 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
248 priority = qualities(BASE_CLIENTS[::-1])
249
250 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
251 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
252 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
253 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
254 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
255
256 _, base_client, variant = _split_innertube_client(client)
257 ytcfg['priority'] = 10 * priority(base_client)
258
259 if not variant:
260 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
261 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
262 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
263 embedscreen['priority'] -= 3
264 elif variant == 'embedded':
265 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
266 ytcfg['priority'] -= 2
267 else:
268 ytcfg['priority'] -= 3
269
270
271 build_innertube_clients()
272
273
274 class YoutubeBaseInfoExtractor(InfoExtractor):
275 """Provide base functions for Youtube extractors"""
276
277 _RESERVED_NAMES = (
278 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
279 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
280 r'browse|oembed|get_video_info|iframe_api|s/player|'
281 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
282
283 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
284
285 # _NETRC_MACHINE = 'youtube'
286
287 # If True it will raise an error if no login info is provided
288 _LOGIN_REQUIRED = False
289
290 _INVIDIOUS_SITES = (
291 # invidious-redirect websites
292 r'(?:www\.)?redirect\.invidious\.io',
293 r'(?:(?:www|dev)\.)?invidio\.us',
294 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
295 r'(?:www\.)?invidious\.pussthecat\.org',
296 r'(?:www\.)?invidious\.zee\.li',
297 r'(?:www\.)?invidious\.ethibox\.fr',
298 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
299 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
300 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
301 # youtube-dl invidious instances list
302 r'(?:(?:www|no)\.)?invidiou\.sh',
303 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
304 r'(?:www\.)?invidious\.kabi\.tk',
305 r'(?:www\.)?invidious\.mastodon\.host',
306 r'(?:www\.)?invidious\.zapashcanon\.fr',
307 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
308 r'(?:www\.)?invidious\.tinfoil-hat\.net',
309 r'(?:www\.)?invidious\.himiko\.cloud',
310 r'(?:www\.)?invidious\.reallyancient\.tech',
311 r'(?:www\.)?invidious\.tube',
312 r'(?:www\.)?invidiou\.site',
313 r'(?:www\.)?invidious\.site',
314 r'(?:www\.)?invidious\.xyz',
315 r'(?:www\.)?invidious\.nixnet\.xyz',
316 r'(?:www\.)?invidious\.048596\.xyz',
317 r'(?:www\.)?invidious\.drycat\.fr',
318 r'(?:www\.)?inv\.skyn3t\.in',
319 r'(?:www\.)?tube\.poal\.co',
320 r'(?:www\.)?tube\.connect\.cafe',
321 r'(?:www\.)?vid\.wxzm\.sx',
322 r'(?:www\.)?vid\.mint\.lgbt',
323 r'(?:www\.)?vid\.puffyan\.us',
324 r'(?:www\.)?yewtu\.be',
325 r'(?:www\.)?yt\.elukerio\.org',
326 r'(?:www\.)?yt\.lelux\.fi',
327 r'(?:www\.)?invidious\.ggc-project\.de',
328 r'(?:www\.)?yt\.maisputain\.ovh',
329 r'(?:www\.)?ytprivate\.com',
330 r'(?:www\.)?invidious\.13ad\.de',
331 r'(?:www\.)?invidious\.toot\.koeln',
332 r'(?:www\.)?invidious\.fdn\.fr',
333 r'(?:www\.)?watch\.nettohikari\.com',
334 r'(?:www\.)?invidious\.namazso\.eu',
335 r'(?:www\.)?invidious\.silkky\.cloud',
336 r'(?:www\.)?invidious\.exonip\.de',
337 r'(?:www\.)?invidious\.riverside\.rocks',
338 r'(?:www\.)?invidious\.blamefran\.net',
339 r'(?:www\.)?invidious\.moomoo\.de',
340 r'(?:www\.)?ytb\.trom\.tf',
341 r'(?:www\.)?yt\.cyberhost\.uk',
342 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
343 r'(?:www\.)?qklhadlycap4cnod\.onion',
344 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
345 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
346 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
347 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
348 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
349 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
350 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
351 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
352 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
353 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
354 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
355 r'(?:www\.)?piped\.kavin\.rocks',
356 r'(?:www\.)?piped\.silkky\.cloud',
357 r'(?:www\.)?piped\.tokhmi\.xyz',
358 r'(?:www\.)?piped\.moomoo\.me',
359 r'(?:www\.)?il\.ax',
360 r'(?:www\.)?piped\.syncpundit\.com',
361 r'(?:www\.)?piped\.mha\.fi',
362 r'(?:www\.)?piped\.mint\.lgbt',
363 r'(?:www\.)?piped\.privacy\.com\.de',
364 )
365
366 def _initialize_consent(self):
367 cookies = self._get_cookies('https://www.youtube.com/')
368 if cookies.get('__Secure-3PSID'):
369 return
370 consent_id = None
371 consent = cookies.get('CONSENT')
372 if consent:
373 if 'YES' in consent.value:
374 return
375 consent_id = self._search_regex(
376 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
377 if not consent_id:
378 consent_id = random.randint(100, 999)
379 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
380
381 def _initialize_pref(self):
382 cookies = self._get_cookies('https://www.youtube.com/')
383 pref_cookie = cookies.get('PREF')
384 pref = {}
385 if pref_cookie:
386 try:
387 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
388 except ValueError:
389 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
390 pref.update({'hl': 'en', 'tz': 'UTC'})
391 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
392
393 def _real_initialize(self):
394 self._initialize_pref()
395 self._initialize_consent()
396 self._check_login_required()
397
398 def _check_login_required(self):
399 if self._LOGIN_REQUIRED and not self._cookies_passed:
400 self.raise_login_required('Login details are needed to download this content', method='cookies')
401
402 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
403 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
404
405 def _get_default_ytcfg(self, client='web'):
406 return copy.deepcopy(INNERTUBE_CLIENTS[client])
407
408 def _get_innertube_host(self, client='web'):
409 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
410
411 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
412 # try_get but with fallback to default ytcfg client values when present
413 _func = lambda y: try_get(y, getter, expected_type)
414 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
415
416 def _extract_client_name(self, ytcfg, default_client='web'):
417 return self._ytcfg_get_safe(
418 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
419 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
420
421 def _extract_client_version(self, ytcfg, default_client='web'):
422 return self._ytcfg_get_safe(
423 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
424 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
425
426 def _select_api_hostname(self, req_api_hostname, default_client=None):
427 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
428 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
429
430 def _extract_api_key(self, ytcfg=None, default_client='web'):
431 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
432
433 def _extract_context(self, ytcfg=None, default_client='web'):
434 context = get_first(
435 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
436 # Enforce language and tz for extraction
437 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
438 client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
439 return context
440
441 _SAPISID = None
442
443 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
444 time_now = round(time.time())
445 if self._SAPISID is None:
446 yt_cookies = self._get_cookies('https://www.youtube.com')
447 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
448 # See: https://github.com/yt-dlp/yt-dlp/issues/393
449 sapisid_cookie = dict_get(
450 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
451 if sapisid_cookie and sapisid_cookie.value:
452 self._SAPISID = sapisid_cookie.value
453 self.write_debug('Extracted SAPISID cookie')
454 # SAPISID cookie is required if not already present
455 if not yt_cookies.get('SAPISID'):
456 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
457 self._set_cookie(
458 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
459 else:
460 self._SAPISID = False
461 if not self._SAPISID:
462 return None
463 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
464 sapisidhash = hashlib.sha1(
465 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
466 return f'SAPISIDHASH {time_now}_{sapisidhash}'
467
468 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
469 note='Downloading API JSON', errnote='Unable to download API page',
470 context=None, api_key=None, api_hostname=None, default_client='web'):
471
472 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
473 data.update(query)
474 real_headers = self.generate_api_headers(default_client=default_client)
475 real_headers.update({'content-type': 'application/json'})
476 if headers:
477 real_headers.update(headers)
478 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
479 or api_key or self._extract_api_key(default_client=default_client))
480 return self._download_json(
481 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
482 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
483 data=json.dumps(data).encode('utf8'), headers=real_headers,
484 query={'key': api_key, 'prettyPrint': 'false'})
485
486 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
487 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
488
489 @staticmethod
490 def _extract_session_index(*data):
491 """
492 Index of current account in account list.
493 See: https://github.com/yt-dlp/yt-dlp/pull/519
494 """
495 for ytcfg in data:
496 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
497 if session_index is not None:
498 return session_index
499
500 # Deprecated?
501 def _extract_identity_token(self, ytcfg=None, webpage=None):
502 if ytcfg:
503 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
504 if token:
505 return token
506 if webpage:
507 return self._search_regex(
508 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
509 'identity token', default=None, fatal=False)
510
511 @staticmethod
512 def _extract_account_syncid(*args):
513 """
514 Extract syncId required to download private playlists of secondary channels
515 @params response and/or ytcfg
516 """
517 for data in args:
518 # ytcfg includes channel_syncid if on secondary channel
519 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
520 if delegated_sid:
521 return delegated_sid
522 sync_ids = (try_get(
523 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
524 lambda x: x['DATASYNC_ID']), str) or '').split('||')
525 if len(sync_ids) >= 2 and sync_ids[1]:
526 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
527 # and just "user_syncid||" for primary channel. We only want the channel_syncid
528 return sync_ids[0]
529
530 @staticmethod
531 def _extract_visitor_data(*args):
532 """
533 Extracts visitorData from an API response or ytcfg
534 Appears to be used to track session state
535 """
536 return get_first(
537 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
538 expected_type=str)
539
540 @functools.cached_property
541 def is_authenticated(self):
542 return bool(self._generate_sapisidhash_header())
543
544 def extract_ytcfg(self, video_id, webpage):
545 if not webpage:
546 return {}
547 return self._parse_json(
548 self._search_regex(
549 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
550 default='{}'), video_id, fatal=False) or {}
551
552 def generate_api_headers(
553 self, *, ytcfg=None, account_syncid=None, session_index=None,
554 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
555
556 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
557 headers = {
558 'X-YouTube-Client-Name': str(
559 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
560 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
561 'Origin': origin,
562 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
563 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
564 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
565 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
566 }
567 if session_index is None:
568 session_index = self._extract_session_index(ytcfg)
569 if account_syncid or session_index is not None:
570 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
571
572 auth = self._generate_sapisidhash_header(origin)
573 if auth is not None:
574 headers['Authorization'] = auth
575 headers['X-Origin'] = origin
576 return {h: v for h, v in headers.items() if v is not None}
577
578 def _download_ytcfg(self, client, video_id):
579 url = {
580 'web': 'https://www.youtube.com',
581 'web_music': 'https://music.youtube.com',
582 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
583 }.get(client)
584 if not url:
585 return {}
586 webpage = self._download_webpage(
587 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
588 return self.extract_ytcfg(video_id, webpage) or {}
589
590 @staticmethod
591 def _build_api_continuation_query(continuation, ctp=None):
592 query = {
593 'continuation': continuation
594 }
595 # TODO: Inconsistency with clickTrackingParams.
596 # Currently we have a fixed ctp contained within context (from ytcfg)
597 # and a ctp in root query for continuation.
598 if ctp:
599 query['clickTracking'] = {'clickTrackingParams': ctp}
600 return query
601
602 @classmethod
603 def _extract_next_continuation_data(cls, renderer):
604 next_continuation = try_get(
605 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
606 lambda x: x['continuation']['reloadContinuationData']), dict)
607 if not next_continuation:
608 return
609 continuation = next_continuation.get('continuation')
610 if not continuation:
611 return
612 ctp = next_continuation.get('clickTrackingParams')
613 return cls._build_api_continuation_query(continuation, ctp)
614
615 @classmethod
616 def _extract_continuation_ep_data(cls, continuation_ep: dict):
617 if isinstance(continuation_ep, dict):
618 continuation = try_get(
619 continuation_ep, lambda x: x['continuationCommand']['token'], str)
620 if not continuation:
621 return
622 ctp = continuation_ep.get('clickTrackingParams')
623 return cls._build_api_continuation_query(continuation, ctp)
624
625 @classmethod
626 def _extract_continuation(cls, renderer):
627 next_continuation = cls._extract_next_continuation_data(renderer)
628 if next_continuation:
629 return next_continuation
630
631 contents = []
632 for key in ('contents', 'items'):
633 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
634
635 for content in contents:
636 if not isinstance(content, dict):
637 continue
638 continuation_ep = try_get(
639 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
640 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
641 dict)
642 continuation = cls._extract_continuation_ep_data(continuation_ep)
643 if continuation:
644 return continuation
645
646 @classmethod
647 def _extract_alerts(cls, data):
648 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
649 if not isinstance(alert_dict, dict):
650 continue
651 for alert in alert_dict.values():
652 alert_type = alert.get('type')
653 if not alert_type:
654 continue
655 message = cls._get_text(alert, 'text')
656 if message:
657 yield alert_type, message
658
659 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
660 errors = []
661 warnings = []
662 for alert_type, alert_message in alerts:
663 if alert_type.lower() == 'error' and fatal:
664 errors.append([alert_type, alert_message])
665 else:
666 warnings.append([alert_type, alert_message])
667
668 for alert_type, alert_message in (warnings + errors[:-1]):
669 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
670 if errors:
671 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
672
673 def _extract_and_report_alerts(self, data, *args, **kwargs):
674 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
675
676 def _extract_badges(self, renderer: dict):
677 badges = set()
678 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
679 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
680 if label:
681 badges.add(label.lower())
682 return badges
683
684 @staticmethod
685 def _get_text(data, *path_list, max_runs=None):
686 for path in path_list or [None]:
687 if path is None:
688 obj = [data]
689 else:
690 obj = traverse_obj(data, path, default=[])
691 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
692 obj = [obj]
693 for item in obj:
694 text = try_get(item, lambda x: x['simpleText'], str)
695 if text:
696 return text
697 runs = try_get(item, lambda x: x['runs'], list) or []
698 if not runs and isinstance(item, list):
699 runs = item
700
701 runs = runs[:min(len(runs), max_runs or len(runs))]
702 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
703 if text:
704 return text
705
706 def _get_count(self, data, *path_list):
707 count_text = self._get_text(data, *path_list) or ''
708 count = parse_count(count_text)
709 if count is None:
710 count = str_to_int(
711 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
712 return count
713
714 @staticmethod
715 def _extract_thumbnails(data, *path_list):
716 """
717 Extract thumbnails from thumbnails dict
718 @param path_list: path list to level that contains 'thumbnails' key
719 """
720 thumbnails = []
721 for path in path_list or [()]:
722 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
723 thumbnail_url = url_or_none(thumbnail.get('url'))
724 if not thumbnail_url:
725 continue
726 # Sometimes youtube gives a wrong thumbnail URL. See:
727 # https://github.com/yt-dlp/yt-dlp/issues/233
728 # https://github.com/ytdl-org/youtube-dl/issues/28023
729 if 'maxresdefault' in thumbnail_url:
730 thumbnail_url = thumbnail_url.split('?')[0]
731 thumbnails.append({
732 'url': thumbnail_url,
733 'height': int_or_none(thumbnail.get('height')),
734 'width': int_or_none(thumbnail.get('width')),
735 })
736 return thumbnails
737
738 @staticmethod
739 def extract_relative_time(relative_time_text):
740 """
741 Extracts a relative time from string and converts to dt object
742 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
743 """
744 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
745 if mobj:
746 start = mobj.group('start')
747 if start:
748 return datetime_from_str(start)
749 try:
750 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
751 except ValueError:
752 return None
753
754 def _extract_time_text(self, renderer, *path_list):
755 """@returns (timestamp, time_text)"""
756 text = self._get_text(renderer, *path_list) or ''
757 dt = self.extract_relative_time(text)
758 timestamp = None
759 if isinstance(dt, datetime.datetime):
760 timestamp = calendar.timegm(dt.timetuple())
761
762 if timestamp is None:
763 timestamp = (
764 unified_timestamp(text) or unified_timestamp(
765 self._search_regex(
766 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
767 text.lower(), 'time text', default=None)))
768
769 if text and timestamp is None:
770 self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
771 return timestamp, text
772
773 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
774 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
775 default_client='web'):
776 for retry in self.RetryManager():
777 try:
778 response = self._call_api(
779 ep=ep, fatal=True, headers=headers,
780 video_id=item_id, query=query, note=note,
781 context=self._extract_context(ytcfg, default_client),
782 api_key=self._extract_api_key(ytcfg, default_client),
783 api_hostname=api_hostname, default_client=default_client)
784 except ExtractorError as e:
785 if not isinstance(e.cause, network_exceptions):
786 return self._error_or_warning(e, fatal=fatal)
787 elif not isinstance(e.cause, urllib.error.HTTPError):
788 retry.error = e
789 continue
790
791 first_bytes = e.cause.read(512)
792 if not is_html(first_bytes):
793 yt_error = try_get(
794 self._parse_json(
795 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
796 lambda x: x['error']['message'], str)
797 if yt_error:
798 self._report_alerts([('ERROR', yt_error)], fatal=False)
799 # Downloading page may result in intermittent 5xx HTTP error
800 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
801 # We also want to catch all other network exceptions since errors in later pages can be troublesome
802 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
803 if e.cause.code not in (403, 429):
804 retry.error = e
805 continue
806 return self._error_or_warning(e, fatal=fatal)
807
808 try:
809 self._extract_and_report_alerts(response, only_once=True)
810 except ExtractorError as e:
811 # YouTube servers may return errors we want to retry on in a 200 OK response
812 # See: https://github.com/yt-dlp/yt-dlp/issues/839
813 if 'unknown error' in e.msg.lower():
814 retry.error = e
815 continue
816 return self._error_or_warning(e, fatal=fatal)
817 # Youtube sometimes sends incomplete data
818 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
819 if not traverse_obj(response, *variadic(check_get_keys)):
820 retry.error = ExtractorError('Incomplete data received', expected=True)
821 continue
822
823 return response
824
825 @staticmethod
826 def is_music_url(url):
827 return re.match(r'https?://music\.youtube\.com/', url) is not None
828
829 def _extract_video(self, renderer):
830 video_id = renderer.get('videoId')
831 title = self._get_text(renderer, 'title')
832 description = self._get_text(renderer, 'descriptionSnippet')
833 duration = parse_duration(self._get_text(
834 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
835 if duration is None:
836 duration = parse_duration(self._search_regex(
837 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
838 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
839 video_id, default=None, group='duration'))
840
841 view_count = self._get_count(renderer, 'viewCountText')
842
843 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
844 channel_id = traverse_obj(
845 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
846 expected_type=str, get_all=False)
847 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
848 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
849 overlay_style = traverse_obj(
850 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
851 get_all=False, expected_type=str)
852 badges = self._extract_badges(renderer)
853 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
854 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
855 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
856 expected_type=str)) or ''
857 url = f'https://www.youtube.com/watch?v={video_id}'
858 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
859 url = f'https://www.youtube.com/shorts/{video_id}'
860
861 return {
862 '_type': 'url',
863 'ie_key': YoutubeIE.ie_key(),
864 'id': video_id,
865 'url': url,
866 'title': title,
867 'description': description,
868 'duration': duration,
869 'view_count': view_count,
870 'uploader': uploader,
871 'channel_id': channel_id,
872 'thumbnails': thumbnails,
873 'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
874 if self._configuration_arg('approximate_date', ie_key='youtubetab')
875 else None),
876 'live_status': ('is_upcoming' if scheduled_timestamp is not None
877 else 'was_live' if 'streamed' in time_text.lower()
878 else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges
879 else None),
880 'release_timestamp': scheduled_timestamp,
881 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
882 }
883
884
885 class YoutubeIE(YoutubeBaseInfoExtractor):
886 IE_DESC = 'YouTube'
887 _VALID_URL = r"""(?x)^
888 (
889 (?:https?://|//) # http(s):// or protocol-independent URL
890 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
891 (?:www\.)?deturl\.com/www\.youtube\.com|
892 (?:www\.)?pwnyoutube\.com|
893 (?:www\.)?hooktube\.com|
894 (?:www\.)?yourepeat\.com|
895 tube\.majestyc\.net|
896 %(invidious)s|
897 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
898 (?:.*?\#/)? # handle anchor (#/) redirect urls
899 (?: # the various things that can precede the ID:
900 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
901 |(?: # or the v= param in all its forms
902 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
903 (?:\?|\#!?) # the params delimiter ? or # or #!
904 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
905 v=
906 )
907 ))
908 |(?:
909 youtu\.be| # just youtu.be/xxxx
910 vid\.plus| # or vid.plus/xxxx
911 zwearz\.com/watch| # or zwearz.com/watch/xxxx
912 %(invidious)s
913 )/
914 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
915 )
916 )? # all until now is optional -> you can pass the naked ID
917 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
918 (?(1).+)? # if we found the ID, everything can follow
919 (?:\#|$)""" % {
920 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
921 }
922 _EMBED_REGEX = [r'''(?x)
923 (?:
924 <iframe[^>]+?src=|
925 data-video-url=|
926 <embed[^>]+?src=|
927 embedSWF\(?:\s*|
928 <object[^>]+data=|
929 new\s+SWFObject\(
930 )
931 (["\'])
932 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
933 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
934 \1''']
935 _PLAYER_INFO_RE = (
936 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
937 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
938 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
939 )
940 _formats = {
941 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
942 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
943 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
944 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
945 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
946 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
947 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
948 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
949 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
950 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
951 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
952 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
953 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
954 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
955 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
956 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
957 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
958 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
959
960
961 # 3D videos
962 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
963 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
964 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
965 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
966 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
967 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
968 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
969
970 # Apple HTTP Live Streaming
971 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
972 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
973 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
974 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
975 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
976 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
977 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
978 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
979
980 # DASH mp4 video
981 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
982 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
983 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
984 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
985 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
986 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
987 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
988 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
989 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
990 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
991 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
992 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
993
994 # Dash mp4 audio
995 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
996 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
997 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
998 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
999 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1000 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1001 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1002
1003 # Dash webm
1004 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1005 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1006 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1007 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1008 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1009 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1010 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1011 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1012 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1013 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1014 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1015 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1016 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1017 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1018 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1019 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1020 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1021 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1022 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1023 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1024 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1025 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1026
1027 # Dash webm audio
1028 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1029 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1030
1031 # Dash webm audio with opus inside
1032 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1033 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1034 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1035
1036 # RTMP (unnamed)
1037 '_rtmp': {'protocol': 'rtmp'},
1038
1039 # av01 video only formats sometimes served with "unknown" codecs
1040 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1041 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1042 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1043 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1044 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1045 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1046 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1047 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1048 }
1049 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1050
1051 _GEO_BYPASS = False
1052
1053 IE_NAME = 'youtube'
1054 _TESTS = [
1055 {
1056 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1057 'info_dict': {
1058 'id': 'BaW_jenozKc',
1059 'ext': 'mp4',
1060 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1061 'uploader': 'Philipp Hagemeister',
1062 'uploader_id': 'phihag',
1063 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1064 'channel': 'Philipp Hagemeister',
1065 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1066 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1067 'upload_date': '20121002',
1068 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1069 'categories': ['Science & Technology'],
1070 'tags': ['youtube-dl'],
1071 'duration': 10,
1072 'view_count': int,
1073 'like_count': int,
1074 'availability': 'public',
1075 'playable_in_embed': True,
1076 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1077 'live_status': 'not_live',
1078 'age_limit': 0,
1079 'start_time': 1,
1080 'end_time': 9,
1081 'comment_count': int,
1082 'channel_follower_count': int
1083 }
1084 },
1085 {
1086 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1087 'note': 'Embed-only video (#1746)',
1088 'info_dict': {
1089 'id': 'yZIXLfi8CZQ',
1090 'ext': 'mp4',
1091 'upload_date': '20120608',
1092 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1093 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1094 'uploader': 'SET India',
1095 'uploader_id': 'setindia',
1096 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1097 'age_limit': 18,
1098 },
1099 'skip': 'Private video',
1100 },
1101 {
1102 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1103 'note': 'Use the first video ID in the URL',
1104 'info_dict': {
1105 'id': 'BaW_jenozKc',
1106 'ext': 'mp4',
1107 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1108 'uploader': 'Philipp Hagemeister',
1109 'uploader_id': 'phihag',
1110 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1111 'channel': 'Philipp Hagemeister',
1112 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1113 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1114 'upload_date': '20121002',
1115 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1116 'categories': ['Science & Technology'],
1117 'tags': ['youtube-dl'],
1118 'duration': 10,
1119 'view_count': int,
1120 'like_count': int,
1121 'availability': 'public',
1122 'playable_in_embed': True,
1123 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1124 'live_status': 'not_live',
1125 'age_limit': 0,
1126 'comment_count': int,
1127 'channel_follower_count': int
1128 },
1129 'params': {
1130 'skip_download': True,
1131 },
1132 },
1133 {
1134 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1135 'note': '256k DASH audio (format 141) via DASH manifest',
1136 'info_dict': {
1137 'id': 'a9LDPn-MO4I',
1138 'ext': 'm4a',
1139 'upload_date': '20121002',
1140 'uploader_id': '8KVIDEO',
1141 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1142 'description': '',
1143 'uploader': '8KVIDEO',
1144 'title': 'UHDTV TEST 8K VIDEO.mp4'
1145 },
1146 'params': {
1147 'youtube_include_dash_manifest': True,
1148 'format': '141',
1149 },
1150 'skip': 'format 141 not served anymore',
1151 },
1152 # DASH manifest with encrypted signature
1153 {
1154 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1155 'info_dict': {
1156 'id': 'IB3lcPjvWLA',
1157 'ext': 'm4a',
1158 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1159 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1160 'duration': 244,
1161 'uploader': 'AfrojackVEVO',
1162 'uploader_id': 'AfrojackVEVO',
1163 'upload_date': '20131011',
1164 'abr': 129.495,
1165 'like_count': int,
1166 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1167 'playable_in_embed': True,
1168 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1169 'view_count': int,
1170 'track': 'The Spark',
1171 'live_status': 'not_live',
1172 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1173 'channel': 'Afrojack',
1174 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1175 'tags': 'count:19',
1176 'availability': 'public',
1177 'categories': ['Music'],
1178 'age_limit': 0,
1179 'alt_title': 'The Spark',
1180 'channel_follower_count': int
1181 },
1182 'params': {
1183 'youtube_include_dash_manifest': True,
1184 'format': '141/bestaudio[ext=m4a]',
1185 },
1186 },
1187 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1188 {
1189 'note': 'Embed allowed age-gate video',
1190 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1191 'info_dict': {
1192 'id': 'HtVdAasjOgU',
1193 'ext': 'mp4',
1194 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1195 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1196 'duration': 142,
1197 'uploader': 'The Witcher',
1198 'uploader_id': 'WitcherGame',
1199 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1200 'upload_date': '20140605',
1201 'age_limit': 18,
1202 'categories': ['Gaming'],
1203 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1204 'availability': 'needs_auth',
1205 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1206 'like_count': int,
1207 'channel': 'The Witcher',
1208 'live_status': 'not_live',
1209 'tags': 'count:17',
1210 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1211 'playable_in_embed': True,
1212 'view_count': int,
1213 'channel_follower_count': int
1214 },
1215 },
1216 {
1217 'note': 'Age-gate video with embed allowed in public site',
1218 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1219 'info_dict': {
1220 'id': 'HsUATh_Nc2U',
1221 'ext': 'mp4',
1222 'title': 'Godzilla 2 (Official Video)',
1223 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1224 'upload_date': '20200408',
1225 'uploader_id': 'FlyingKitty900',
1226 'uploader': 'FlyingKitty',
1227 'age_limit': 18,
1228 'availability': 'needs_auth',
1229 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1230 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1231 'channel': 'FlyingKitty',
1232 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1233 'view_count': int,
1234 'categories': ['Entertainment'],
1235 'live_status': 'not_live',
1236 'tags': ['Flyingkitty', 'godzilla 2'],
1237 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1238 'like_count': int,
1239 'duration': 177,
1240 'playable_in_embed': True,
1241 'channel_follower_count': int
1242 },
1243 },
1244 {
1245 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1246 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1247 'info_dict': {
1248 'id': 'Tq92D6wQ1mg',
1249 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1250 'ext': 'mp4',
1251 'upload_date': '20191228',
1252 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1253 'uploader': 'Projekt Melody',
1254 'description': 'md5:17eccca93a786d51bc67646756894066',
1255 'age_limit': 18,
1256 'like_count': int,
1257 'availability': 'needs_auth',
1258 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1259 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1260 'view_count': int,
1261 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1262 'channel': 'Projekt Melody',
1263 'live_status': 'not_live',
1264 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1265 'playable_in_embed': True,
1266 'categories': ['Entertainment'],
1267 'duration': 106,
1268 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1269 'comment_count': int,
1270 'channel_follower_count': int
1271 },
1272 },
1273 {
1274 'note': 'Non-Agegated non-embeddable video',
1275 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1276 'info_dict': {
1277 'id': 'MeJVWBSsPAY',
1278 'ext': 'mp4',
1279 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1280 'uploader': 'Herr Lurik',
1281 'uploader_id': 'st3in234',
1282 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1283 'upload_date': '20130730',
1284 'track': 'Such mich find mich',
1285 'age_limit': 0,
1286 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1287 'like_count': int,
1288 'playable_in_embed': False,
1289 'creator': 'OOMPH!',
1290 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1291 'view_count': int,
1292 'alt_title': 'Such mich find mich',
1293 'duration': 210,
1294 'channel': 'Herr Lurik',
1295 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1296 'categories': ['Music'],
1297 'availability': 'public',
1298 'uploader_url': 'http://www.youtube.com/user/st3in234',
1299 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1300 'live_status': 'not_live',
1301 'artist': 'OOMPH!',
1302 'channel_follower_count': int
1303 },
1304 },
1305 {
1306 'note': 'Non-bypassable age-gated video',
1307 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1308 'only_matching': True,
1309 },
1310 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1311 # YouTube Red ad is not captured for creator
1312 {
1313 'url': '__2ABJjxzNo',
1314 'info_dict': {
1315 'id': '__2ABJjxzNo',
1316 'ext': 'mp4',
1317 'duration': 266,
1318 'upload_date': '20100430',
1319 'uploader_id': 'deadmau5',
1320 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1321 'creator': 'deadmau5',
1322 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1323 'uploader': 'deadmau5',
1324 'title': 'Deadmau5 - Some Chords (HD)',
1325 'alt_title': 'Some Chords',
1326 'availability': 'public',
1327 'tags': 'count:14',
1328 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1329 'view_count': int,
1330 'live_status': 'not_live',
1331 'channel': 'deadmau5',
1332 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1333 'like_count': int,
1334 'track': 'Some Chords',
1335 'artist': 'deadmau5',
1336 'playable_in_embed': True,
1337 'age_limit': 0,
1338 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1339 'categories': ['Music'],
1340 'album': 'Some Chords',
1341 'channel_follower_count': int
1342 },
1343 'expected_warnings': [
1344 'DASH manifest missing',
1345 ]
1346 },
1347 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1348 {
1349 'url': 'lqQg6PlCWgI',
1350 'info_dict': {
1351 'id': 'lqQg6PlCWgI',
1352 'ext': 'mp4',
1353 'duration': 6085,
1354 'upload_date': '20150827',
1355 'uploader_id': 'olympic',
1356 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1357 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1358 'uploader': 'Olympics',
1359 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1360 'like_count': int,
1361 'release_timestamp': 1343767800,
1362 'playable_in_embed': True,
1363 'categories': ['Sports'],
1364 'release_date': '20120731',
1365 'channel': 'Olympics',
1366 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1367 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1368 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1369 'age_limit': 0,
1370 'availability': 'public',
1371 'live_status': 'was_live',
1372 'view_count': int,
1373 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1374 'channel_follower_count': int
1375 },
1376 'params': {
1377 'skip_download': 'requires avconv',
1378 }
1379 },
1380 # Non-square pixels
1381 {
1382 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1383 'info_dict': {
1384 'id': '_b-2C3KPAM0',
1385 'ext': 'mp4',
1386 'stretched_ratio': 16 / 9.,
1387 'duration': 85,
1388 'upload_date': '20110310',
1389 'uploader_id': 'AllenMeow',
1390 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1391 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1392 'uploader': '孫ᄋᄅ',
1393 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1394 'playable_in_embed': True,
1395 'channel': '孫ᄋᄅ',
1396 'age_limit': 0,
1397 'tags': 'count:11',
1398 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1399 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1400 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1401 'view_count': int,
1402 'categories': ['People & Blogs'],
1403 'like_count': int,
1404 'live_status': 'not_live',
1405 'availability': 'unlisted',
1406 'comment_count': int,
1407 'channel_follower_count': int
1408 },
1409 },
1410 # url_encoded_fmt_stream_map is empty string
1411 {
1412 'url': 'qEJwOuvDf7I',
1413 'info_dict': {
1414 'id': 'qEJwOuvDf7I',
1415 'ext': 'webm',
1416 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1417 'description': '',
1418 'upload_date': '20150404',
1419 'uploader_id': 'spbelect',
1420 'uploader': 'Наблюдатели Петербурга',
1421 },
1422 'params': {
1423 'skip_download': 'requires avconv',
1424 },
1425 'skip': 'This live event has ended.',
1426 },
1427 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1428 {
1429 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1430 'info_dict': {
1431 'id': 'FIl7x6_3R5Y',
1432 'ext': 'webm',
1433 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1434 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1435 'duration': 220,
1436 'upload_date': '20150625',
1437 'uploader_id': 'dorappi2000',
1438 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1439 'uploader': 'dorappi2000',
1440 'formats': 'mincount:31',
1441 },
1442 'skip': 'not actual anymore',
1443 },
1444 # DASH manifest with segment_list
1445 {
1446 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1447 'md5': '8ce563a1d667b599d21064e982ab9e31',
1448 'info_dict': {
1449 'id': 'CsmdDsKjzN8',
1450 'ext': 'mp4',
1451 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1452 'uploader': 'Airtek',
1453 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1454 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1455 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1456 },
1457 'params': {
1458 'youtube_include_dash_manifest': True,
1459 'format': '135', # bestvideo
1460 },
1461 'skip': 'This live event has ended.',
1462 },
1463 {
1464 # Multifeed videos (multiple cameras), URL is for Main Camera
1465 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1466 'info_dict': {
1467 'id': 'jvGDaLqkpTg',
1468 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1469 'description': 'md5:e03b909557865076822aa169218d6a5d',
1470 },
1471 'playlist': [{
1472 'info_dict': {
1473 'id': 'jvGDaLqkpTg',
1474 'ext': 'mp4',
1475 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1476 'description': 'md5:e03b909557865076822aa169218d6a5d',
1477 'duration': 10643,
1478 'upload_date': '20161111',
1479 'uploader': 'Team PGP',
1480 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1481 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1482 },
1483 }, {
1484 'info_dict': {
1485 'id': '3AKt1R1aDnw',
1486 'ext': 'mp4',
1487 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1488 'description': 'md5:e03b909557865076822aa169218d6a5d',
1489 'duration': 10991,
1490 'upload_date': '20161111',
1491 'uploader': 'Team PGP',
1492 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1493 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1494 },
1495 }, {
1496 'info_dict': {
1497 'id': 'RtAMM00gpVc',
1498 'ext': 'mp4',
1499 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1500 'description': 'md5:e03b909557865076822aa169218d6a5d',
1501 'duration': 10995,
1502 'upload_date': '20161111',
1503 'uploader': 'Team PGP',
1504 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1505 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1506 },
1507 }, {
1508 'info_dict': {
1509 'id': '6N2fdlP3C5U',
1510 'ext': 'mp4',
1511 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1512 'description': 'md5:e03b909557865076822aa169218d6a5d',
1513 'duration': 10990,
1514 'upload_date': '20161111',
1515 'uploader': 'Team PGP',
1516 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1517 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1518 },
1519 }],
1520 'params': {
1521 'skip_download': True,
1522 },
1523 'skip': 'Not multifeed anymore',
1524 },
1525 {
1526 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1527 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1528 'info_dict': {
1529 'id': 'gVfLd0zydlo',
1530 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1531 },
1532 'playlist_count': 2,
1533 'skip': 'Not multifeed anymore',
1534 },
1535 {
1536 'url': 'https://vid.plus/FlRa-iH7PGw',
1537 'only_matching': True,
1538 },
1539 {
1540 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1541 'only_matching': True,
1542 },
1543 {
1544 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1545 # Also tests cut-off URL expansion in video description (see
1546 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1547 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1548 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1549 'info_dict': {
1550 'id': 'lsguqyKfVQg',
1551 'ext': 'mp4',
1552 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1553 'alt_title': 'Dark Walk',
1554 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1555 'duration': 133,
1556 'upload_date': '20151119',
1557 'uploader_id': 'IronSoulElf',
1558 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1559 'uploader': 'IronSoulElf',
1560 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1561 'track': 'Dark Walk',
1562 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1563 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1564 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1565 'categories': ['Film & Animation'],
1566 'view_count': int,
1567 'live_status': 'not_live',
1568 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1569 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1570 'tags': 'count:13',
1571 'availability': 'public',
1572 'channel': 'IronSoulElf',
1573 'playable_in_embed': True,
1574 'like_count': int,
1575 'age_limit': 0,
1576 'channel_follower_count': int
1577 },
1578 'params': {
1579 'skip_download': True,
1580 },
1581 },
1582 {
1583 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1584 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1585 'only_matching': True,
1586 },
1587 {
1588 # Video with yt:stretch=17:0
1589 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1590 'info_dict': {
1591 'id': 'Q39EVAstoRM',
1592 'ext': 'mp4',
1593 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1594 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1595 'upload_date': '20151107',
1596 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1597 'uploader': 'CH GAMER DROID',
1598 },
1599 'params': {
1600 'skip_download': True,
1601 },
1602 'skip': 'This video does not exist.',
1603 },
1604 {
1605 # Video with incomplete 'yt:stretch=16:'
1606 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1607 'only_matching': True,
1608 },
1609 {
1610 # Video licensed under Creative Commons
1611 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1612 'info_dict': {
1613 'id': 'M4gD1WSo5mA',
1614 'ext': 'mp4',
1615 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1616 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1617 'duration': 721,
1618 'upload_date': '20150128',
1619 'uploader_id': 'BerkmanCenter',
1620 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1621 'uploader': 'The Berkman Klein Center for Internet & Society',
1622 'license': 'Creative Commons Attribution license (reuse allowed)',
1623 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1624 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1625 'like_count': int,
1626 'age_limit': 0,
1627 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1628 'channel': 'The Berkman Klein Center for Internet & Society',
1629 'availability': 'public',
1630 'view_count': int,
1631 'categories': ['Education'],
1632 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1633 'live_status': 'not_live',
1634 'playable_in_embed': True,
1635 'comment_count': int,
1636 'channel_follower_count': int
1637 },
1638 'params': {
1639 'skip_download': True,
1640 },
1641 },
1642 {
1643 # Channel-like uploader_url
1644 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1645 'info_dict': {
1646 'id': 'eQcmzGIKrzg',
1647 'ext': 'mp4',
1648 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1649 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1650 'duration': 4060,
1651 'upload_date': '20151120',
1652 'uploader': 'Bernie Sanders',
1653 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1654 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1655 'license': 'Creative Commons Attribution license (reuse allowed)',
1656 'playable_in_embed': True,
1657 'tags': 'count:12',
1658 'like_count': int,
1659 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1660 'age_limit': 0,
1661 'availability': 'public',
1662 'categories': ['News & Politics'],
1663 'channel': 'Bernie Sanders',
1664 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1665 'view_count': int,
1666 'live_status': 'not_live',
1667 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1668 'comment_count': int,
1669 'channel_follower_count': int
1670 },
1671 'params': {
1672 'skip_download': True,
1673 },
1674 },
1675 {
1676 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1677 'only_matching': True,
1678 },
1679 {
1680 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1681 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1682 'only_matching': True,
1683 },
1684 {
1685 # Rental video preview
1686 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1687 'info_dict': {
1688 'id': 'uGpuVWrhIzE',
1689 'ext': 'mp4',
1690 'title': 'Piku - Trailer',
1691 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1692 'upload_date': '20150811',
1693 'uploader': 'FlixMatrix',
1694 'uploader_id': 'FlixMatrixKaravan',
1695 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1696 'license': 'Standard YouTube License',
1697 },
1698 'params': {
1699 'skip_download': True,
1700 },
1701 'skip': 'This video is not available.',
1702 },
1703 {
1704 # YouTube Red video with episode data
1705 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1706 'info_dict': {
1707 'id': 'iqKdEhx-dD4',
1708 'ext': 'mp4',
1709 'title': 'Isolation - Mind Field (Ep 1)',
1710 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1711 'duration': 2085,
1712 'upload_date': '20170118',
1713 'uploader': 'Vsauce',
1714 'uploader_id': 'Vsauce',
1715 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1716 'series': 'Mind Field',
1717 'season_number': 1,
1718 'episode_number': 1,
1719 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1720 'tags': 'count:12',
1721 'view_count': int,
1722 'availability': 'public',
1723 'age_limit': 0,
1724 'channel': 'Vsauce',
1725 'episode': 'Episode 1',
1726 'categories': ['Entertainment'],
1727 'season': 'Season 1',
1728 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1729 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1730 'like_count': int,
1731 'playable_in_embed': True,
1732 'live_status': 'not_live',
1733 'channel_follower_count': int
1734 },
1735 'params': {
1736 'skip_download': True,
1737 },
1738 'expected_warnings': [
1739 'Skipping DASH manifest',
1740 ],
1741 },
1742 {
1743 # The following content has been identified by the YouTube community
1744 # as inappropriate or offensive to some audiences.
1745 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1746 'info_dict': {
1747 'id': '6SJNVb0GnPI',
1748 'ext': 'mp4',
1749 'title': 'Race Differences in Intelligence',
1750 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1751 'duration': 965,
1752 'upload_date': '20140124',
1753 'uploader': 'New Century Foundation',
1754 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1755 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1756 },
1757 'params': {
1758 'skip_download': True,
1759 },
1760 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1761 },
1762 {
1763 # itag 212
1764 'url': '1t24XAntNCY',
1765 'only_matching': True,
1766 },
1767 {
1768 # geo restricted to JP
1769 'url': 'sJL6WA-aGkQ',
1770 'only_matching': True,
1771 },
1772 {
1773 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1774 'only_matching': True,
1775 },
1776 {
1777 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1778 'only_matching': True,
1779 },
1780 {
1781 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1782 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1783 'only_matching': True,
1784 },
1785 {
1786 # DRM protected
1787 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1788 'only_matching': True,
1789 },
1790 {
1791 # Video with unsupported adaptive stream type formats
1792 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1793 'info_dict': {
1794 'id': 'Z4Vy8R84T1U',
1795 'ext': 'mp4',
1796 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1797 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1798 'duration': 433,
1799 'upload_date': '20130923',
1800 'uploader': 'Amelia Putri Harwita',
1801 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1802 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1803 'formats': 'maxcount:10',
1804 },
1805 'params': {
1806 'skip_download': True,
1807 'youtube_include_dash_manifest': False,
1808 },
1809 'skip': 'not actual anymore',
1810 },
1811 {
1812 # Youtube Music Auto-generated description
1813 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1814 'info_dict': {
1815 'id': 'MgNrAu2pzNs',
1816 'ext': 'mp4',
1817 'title': 'Voyeur Girl',
1818 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1819 'upload_date': '20190312',
1820 'uploader': 'Stephen - Topic',
1821 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1822 'artist': 'Stephen',
1823 'track': 'Voyeur Girl',
1824 'album': 'it\'s too much love to know my dear',
1825 'release_date': '20190313',
1826 'release_year': 2019,
1827 'alt_title': 'Voyeur Girl',
1828 'view_count': int,
1829 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1830 'playable_in_embed': True,
1831 'like_count': int,
1832 'categories': ['Music'],
1833 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1834 'channel': 'Stephen',
1835 'availability': 'public',
1836 'creator': 'Stephen',
1837 'duration': 169,
1838 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1839 'age_limit': 0,
1840 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1841 'tags': 'count:11',
1842 'live_status': 'not_live',
1843 'channel_follower_count': int
1844 },
1845 'params': {
1846 'skip_download': True,
1847 },
1848 },
1849 {
1850 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1851 'only_matching': True,
1852 },
1853 {
1854 # invalid -> valid video id redirection
1855 'url': 'DJztXj2GPfl',
1856 'info_dict': {
1857 'id': 'DJztXj2GPfk',
1858 'ext': 'mp4',
1859 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1860 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1861 'upload_date': '20090125',
1862 'uploader': 'Prochorowka',
1863 'uploader_id': 'Prochorowka',
1864 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1865 'artist': 'Panjabi MC',
1866 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1867 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1868 },
1869 'params': {
1870 'skip_download': True,
1871 },
1872 'skip': 'Video unavailable',
1873 },
1874 {
1875 # empty description results in an empty string
1876 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1877 'info_dict': {
1878 'id': 'x41yOUIvK2k',
1879 'ext': 'mp4',
1880 'title': 'IMG 3456',
1881 'description': '',
1882 'upload_date': '20170613',
1883 'uploader_id': 'ElevageOrVert',
1884 'uploader': 'ElevageOrVert',
1885 'view_count': int,
1886 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1887 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1888 'like_count': int,
1889 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1890 'tags': [],
1891 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1892 'availability': 'public',
1893 'age_limit': 0,
1894 'categories': ['Pets & Animals'],
1895 'duration': 7,
1896 'playable_in_embed': True,
1897 'live_status': 'not_live',
1898 'channel': 'ElevageOrVert',
1899 'channel_follower_count': int
1900 },
1901 'params': {
1902 'skip_download': True,
1903 },
1904 },
1905 {
1906 # with '};' inside yt initial data (see [1])
1907 # see [2] for an example with '};' inside ytInitialPlayerResponse
1908 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1909 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1910 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1911 'info_dict': {
1912 'id': 'CHqg6qOn4no',
1913 'ext': 'mp4',
1914 'title': 'Part 77 Sort a list of simple types in c#',
1915 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1916 'upload_date': '20130831',
1917 'uploader_id': 'kudvenkat',
1918 'uploader': 'kudvenkat',
1919 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
1920 'like_count': int,
1921 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
1922 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
1923 'live_status': 'not_live',
1924 'categories': ['Education'],
1925 'availability': 'public',
1926 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
1927 'tags': 'count:12',
1928 'playable_in_embed': True,
1929 'age_limit': 0,
1930 'view_count': int,
1931 'duration': 522,
1932 'channel': 'kudvenkat',
1933 'comment_count': int,
1934 'channel_follower_count': int
1935 },
1936 'params': {
1937 'skip_download': True,
1938 },
1939 },
1940 {
1941 # another example of '};' in ytInitialData
1942 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1943 'only_matching': True,
1944 },
1945 {
1946 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1947 'only_matching': True,
1948 },
1949 {
1950 # https://github.com/ytdl-org/youtube-dl/pull/28094
1951 'url': 'OtqTfy26tG0',
1952 'info_dict': {
1953 'id': 'OtqTfy26tG0',
1954 'ext': 'mp4',
1955 'title': 'Burn Out',
1956 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1957 'upload_date': '20141120',
1958 'uploader': 'The Cinematic Orchestra - Topic',
1959 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1960 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1961 'artist': 'The Cinematic Orchestra',
1962 'track': 'Burn Out',
1963 'album': 'Every Day',
1964 'like_count': int,
1965 'live_status': 'not_live',
1966 'alt_title': 'Burn Out',
1967 'duration': 614,
1968 'age_limit': 0,
1969 'view_count': int,
1970 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1971 'creator': 'The Cinematic Orchestra',
1972 'channel': 'The Cinematic Orchestra',
1973 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
1974 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1975 'availability': 'public',
1976 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
1977 'categories': ['Music'],
1978 'playable_in_embed': True,
1979 'channel_follower_count': int
1980 },
1981 'params': {
1982 'skip_download': True,
1983 },
1984 },
1985 {
1986 # controversial video, only works with bpctr when authenticated with cookies
1987 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1988 'only_matching': True,
1989 },
1990 {
1991 # controversial video, requires bpctr/contentCheckOk
1992 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1993 'info_dict': {
1994 'id': 'SZJvDhaSDnc',
1995 'ext': 'mp4',
1996 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1997 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1998 'uploader': 'CBS Mornings',
1999 'uploader_id': 'CBSThisMorning',
2000 'upload_date': '20140716',
2001 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2002 'duration': 170,
2003 'categories': ['News & Politics'],
2004 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
2005 'view_count': int,
2006 'channel': 'CBS Mornings',
2007 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2008 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2009 'age_limit': 18,
2010 'availability': 'needs_auth',
2011 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2012 'like_count': int,
2013 'live_status': 'not_live',
2014 'playable_in_embed': True,
2015 'channel_follower_count': int
2016 }
2017 },
2018 {
2019 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2020 'url': 'cBvYw8_A0vQ',
2021 'info_dict': {
2022 'id': 'cBvYw8_A0vQ',
2023 'ext': 'mp4',
2024 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2025 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2026 'upload_date': '20201120',
2027 'uploader': 'Walk around Japan',
2028 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2029 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2030 'duration': 1456,
2031 'categories': ['Travel & Events'],
2032 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2033 'view_count': int,
2034 'channel': 'Walk around Japan',
2035 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2036 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2037 'age_limit': 0,
2038 'availability': 'public',
2039 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2040 'live_status': 'not_live',
2041 'playable_in_embed': True,
2042 'channel_follower_count': int
2043 },
2044 'params': {
2045 'skip_download': True,
2046 },
2047 }, {
2048 # Has multiple audio streams
2049 'url': 'WaOKSUlf4TM',
2050 'only_matching': True
2051 }, {
2052 # Requires Premium: has format 141 when requested using YTM url
2053 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2054 'only_matching': True
2055 }, {
2056 # multiple subtitles with same lang_code
2057 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2058 'only_matching': True,
2059 }, {
2060 # Force use android client fallback
2061 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2062 'info_dict': {
2063 'id': 'YOelRv7fMxY',
2064 'title': 'DIGGING A SECRET TUNNEL Part 1',
2065 'ext': '3gp',
2066 'upload_date': '20210624',
2067 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2068 'uploader': 'colinfurze',
2069 'uploader_id': 'colinfurze',
2070 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2071 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2072 'duration': 596,
2073 'categories': ['Entertainment'],
2074 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2075 'view_count': int,
2076 'channel': 'colinfurze',
2077 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2078 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2079 'age_limit': 0,
2080 'availability': 'public',
2081 'like_count': int,
2082 'live_status': 'not_live',
2083 'playable_in_embed': True,
2084 'channel_follower_count': int
2085 },
2086 'params': {
2087 'format': '17', # 3gp format available on android
2088 'extractor_args': {'youtube': {'player_client': ['android']}},
2089 },
2090 },
2091 {
2092 # Skip download of additional client configs (remix client config in this case)
2093 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2094 'only_matching': True,
2095 'params': {
2096 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2097 },
2098 }, {
2099 # shorts
2100 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2101 'only_matching': True,
2102 }, {
2103 'note': 'Storyboards',
2104 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2105 'info_dict': {
2106 'id': '5KLPxDtMqe8',
2107 'ext': 'mhtml',
2108 'format_id': 'sb0',
2109 'title': 'Your Brain is Plastic',
2110 'uploader_id': 'scishow',
2111 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2112 'upload_date': '20140324',
2113 'uploader': 'SciShow',
2114 'like_count': int,
2115 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2116 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2117 'view_count': int,
2118 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2119 'playable_in_embed': True,
2120 'tags': 'count:12',
2121 'uploader_url': 'http://www.youtube.com/user/scishow',
2122 'availability': 'public',
2123 'channel': 'SciShow',
2124 'live_status': 'not_live',
2125 'duration': 248,
2126 'categories': ['Education'],
2127 'age_limit': 0,
2128 'channel_follower_count': int
2129 }, 'params': {'format': 'mhtml', 'skip_download': True}
2130 }, {
2131 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2132 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2133 'info_dict': {
2134 'id': '2NUZ8W2llS4',
2135 'ext': 'mp4',
2136 'title': 'The NP that test your phone performance 🙂',
2137 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2138 'uploader': 'Leon Nguyen',
2139 'uploader_id': 'VNSXIII',
2140 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2141 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2142 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2143 'duration': 21,
2144 'view_count': int,
2145 'age_limit': 0,
2146 'categories': ['Gaming'],
2147 'tags': 'count:23',
2148 'playable_in_embed': True,
2149 'live_status': 'not_live',
2150 'upload_date': '20220103',
2151 'like_count': int,
2152 'availability': 'public',
2153 'channel': 'Leon Nguyen',
2154 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2155 'comment_count': int,
2156 'channel_follower_count': int
2157 }
2158 }, {
2159 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2160 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2161 'info_dict': {
2162 'id': 'mzZzzBU6lrM',
2163 'ext': 'mp4',
2164 'title': 'I Met GeorgeNotFound In Real Life...',
2165 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2166 'uploader': 'Quackity',
2167 'uploader_id': 'QuackityHQ',
2168 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2169 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2170 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2171 'duration': 955,
2172 'view_count': int,
2173 'age_limit': 0,
2174 'categories': ['Entertainment'],
2175 'tags': 'count:26',
2176 'playable_in_embed': True,
2177 'live_status': 'not_live',
2178 'release_timestamp': 1641172509,
2179 'release_date': '20220103',
2180 'upload_date': '20220103',
2181 'like_count': int,
2182 'availability': 'public',
2183 'channel': 'Quackity',
2184 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2185 'channel_follower_count': int
2186 }
2187 },
2188 { # continuous livestream. Microformat upload date should be preferred.
2189 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2190 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2191 'info_dict': {
2192 'id': 'kgx4WGK0oNU',
2193 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2194 'ext': 'mp4',
2195 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2196 'availability': 'public',
2197 'age_limit': 0,
2198 'release_timestamp': 1637975704,
2199 'upload_date': '20210619',
2200 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2201 'live_status': 'is_live',
2202 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2203 'uploader': '阿鲍Abao',
2204 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2205 'channel': 'Abao in Tokyo',
2206 'channel_follower_count': int,
2207 'release_date': '20211127',
2208 'tags': 'count:39',
2209 'categories': ['People & Blogs'],
2210 'like_count': int,
2211 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2212 'view_count': int,
2213 'playable_in_embed': True,
2214 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2215 },
2216 'params': {'skip_download': True}
2217 }, {
2218 # Story. Requires specific player params to work.
2219 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
2220 'info_dict': {
2221 'id': 'vv8qTUWmulI',
2222 'ext': 'mp4',
2223 'availability': 'unlisted',
2224 'view_count': int,
2225 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2226 'upload_date': '20220526',
2227 'categories': ['Education'],
2228 'title': 'Story',
2229 'channel': 'IT\'S HISTORY',
2230 'description': '',
2231 'uploader_id': 'BlastfromthePast',
2232 'duration': 12,
2233 'uploader': 'IT\'S HISTORY',
2234 'playable_in_embed': True,
2235 'age_limit': 0,
2236 'live_status': 'not_live',
2237 'tags': [],
2238 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2239 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2240 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
2241 },
2242 'skip': 'stories get removed after some period of time',
2243 }, {
2244 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2245 'info_dict': {
2246 'id': 'tjjjtzRLHvA',
2247 'ext': 'mp4',
2248 'title': 'ハッシュタグ無し };if window.ytcsi',
2249 'upload_date': '20220323',
2250 'like_count': int,
2251 'availability': 'unlisted',
2252 'channel': 'nao20010128nao',
2253 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2254 'age_limit': 0,
2255 'uploader': 'nao20010128nao',
2256 'uploader_id': 'nao20010128nao',
2257 'categories': ['Music'],
2258 'view_count': int,
2259 'description': '',
2260 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2261 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2262 'live_status': 'not_live',
2263 'playable_in_embed': True,
2264 'channel_follower_count': int,
2265 'duration': 6,
2266 'tags': [],
2267 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
2268 }
2269 }, {
2270 'note': '6 channel audio',
2271 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2272 'only_matching': True,
2273 }
2274 ]
2275
2276 _WEBPAGE_TESTS = [
2277 # YouTube <object> embed
2278 {
2279 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2280 'md5': '873c81d308b979f0e23ee7e620b312a3',
2281 'info_dict': {
2282 'id': 'msN87y-iEx0',
2283 'ext': 'mp4',
2284 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2285 'upload_date': '20080526',
2286 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2287 'uploader': 'Christopher Sykes',
2288 'uploader_id': 'ChristopherJSykes',
2289 'age_limit': 0,
2290 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2291 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2292 'playable_in_embed': True,
2293 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2294 'like_count': int,
2295 'comment_count': int,
2296 'channel': 'Christopher Sykes',
2297 'live_status': 'not_live',
2298 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2299 'availability': 'public',
2300 'duration': 195,
2301 'view_count': int,
2302 'categories': ['Science & Technology'],
2303 'channel_follower_count': int,
2304 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2305 },
2306 'params': {
2307 'skip_download': True,
2308 }
2309 },
2310 ]
2311
2312 @classmethod
2313 def suitable(cls, url):
2314 from ..utils import parse_qs
2315
2316 qs = parse_qs(url)
2317 if qs.get('list', [None])[0]:
2318 return False
2319 return super().suitable(url)
2320
2321 def __init__(self, *args, **kwargs):
2322 super().__init__(*args, **kwargs)
2323 self._code_cache = {}
2324 self._player_cache = {}
2325
2326 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
2327 lock = threading.Lock()
2328
2329 is_live = True
2330 start_time = time.time()
2331 formats = [f for f in formats if f.get('is_from_start')]
2332
2333 def refetch_manifest(format_id, delay):
2334 nonlocal formats, start_time, is_live
2335 if time.time() <= start_time + delay:
2336 return
2337
2338 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2339 video_details = traverse_obj(
2340 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2341 microformats = traverse_obj(
2342 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2343 expected_type=dict, default=[])
2344 _, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2345 start_time = time.time()
2346
2347 def mpd_feed(format_id, delay):
2348 """
2349 @returns (manifest_url, manifest_stream_number, is_live) or None
2350 """
2351 with lock:
2352 refetch_manifest(format_id, delay)
2353
2354 f = next((f for f in formats if f['format_id'] == format_id), None)
2355 if not f:
2356 if not is_live:
2357 self.to_screen(f'{video_id}: Video is no longer live')
2358 else:
2359 self.report_warning(
2360 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
2361 return None
2362 return f['manifest_url'], f['manifest_stream_number'], is_live
2363
2364 for f in formats:
2365 f['is_live'] = True
2366 f['protocol'] = 'http_dash_segments_generator'
2367 f['fragments'] = functools.partial(
2368 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2369
2370 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2371 FETCH_SPAN, MAX_DURATION = 5, 432000
2372
2373 mpd_url, stream_number, is_live = None, None, True
2374
2375 begin_index = 0
2376 download_start_time = ctx.get('start') or time.time()
2377
2378 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2379 if lack_early_segments:
2380 self.report_warning(bug_reports_message(
2381 'Starting download from the last 120 hours of the live stream since '
2382 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2383 lack_early_segments = True
2384
2385 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2386 fragments, fragment_base_url = None, None
2387
2388 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2389 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2390 # Obtain from MPD's maximum seq value
2391 old_mpd_url = mpd_url
2392 last_error = ctx.pop('last_error', None)
2393 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
2394 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2395 or (mpd_url, stream_number, False))
2396 if not refresh_sequence:
2397 if expire_fast and not is_live:
2398 return False, last_seq
2399 elif old_mpd_url == mpd_url:
2400 return True, last_seq
2401 try:
2402 fmts, _ = self._extract_mpd_formats_and_subtitles(
2403 mpd_url, None, note=False, errnote=False, fatal=False)
2404 except ExtractorError:
2405 fmts = None
2406 if not fmts:
2407 no_fragment_score += 2
2408 return False, last_seq
2409 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2410 fragments = fmt_info['fragments']
2411 fragment_base_url = fmt_info['fragment_base_url']
2412 assert fragment_base_url
2413
2414 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2415 return True, _last_seq
2416
2417 while is_live:
2418 fetch_time = time.time()
2419 if no_fragment_score > 30:
2420 return
2421 if last_segment_url:
2422 # Obtain from "X-Head-Seqnum" header value from each segment
2423 try:
2424 urlh = self._request_webpage(
2425 last_segment_url, None, note=False, errnote=False, fatal=False)
2426 except ExtractorError:
2427 urlh = None
2428 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2429 if last_seq is None:
2430 no_fragment_score += 2
2431 last_segment_url = None
2432 continue
2433 else:
2434 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2435 no_fragment_score += 2
2436 if not should_continue:
2437 continue
2438
2439 if known_idx > last_seq:
2440 last_segment_url = None
2441 continue
2442
2443 last_seq += 1
2444
2445 if begin_index < 0 and known_idx < 0:
2446 # skip from the start when it's negative value
2447 known_idx = last_seq + begin_index
2448 if lack_early_segments:
2449 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2450 try:
2451 for idx in range(known_idx, last_seq):
2452 # do not update sequence here or you'll get skipped some part of it
2453 should_continue, _ = _extract_sequence_from_mpd(False, False)
2454 if not should_continue:
2455 known_idx = idx - 1
2456 raise ExtractorError('breaking out of outer loop')
2457 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2458 yield {
2459 'url': last_segment_url,
2460 'fragment_count': last_seq,
2461 }
2462 if known_idx == last_seq:
2463 no_fragment_score += 5
2464 else:
2465 no_fragment_score = 0
2466 known_idx = last_seq
2467 except ExtractorError:
2468 continue
2469
2470 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2471
2472 def _extract_player_url(self, *ytcfgs, webpage=None):
2473 player_url = traverse_obj(
2474 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2475 get_all=False, expected_type=str)
2476 if not player_url:
2477 return
2478 return urljoin('https://www.youtube.com', player_url)
2479
2480 def _download_player_url(self, video_id, fatal=False):
2481 res = self._download_webpage(
2482 'https://www.youtube.com/iframe_api',
2483 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2484 if res:
2485 player_version = self._search_regex(
2486 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2487 if player_version:
2488 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2489
2490 def _signature_cache_id(self, example_sig):
2491 """ Return a string representation of a signature """
2492 return '.'.join(str(len(part)) for part in example_sig.split('.'))
2493
2494 @classmethod
2495 def _extract_player_info(cls, player_url):
2496 for player_re in cls._PLAYER_INFO_RE:
2497 id_m = re.search(player_re, player_url)
2498 if id_m:
2499 break
2500 else:
2501 raise ExtractorError('Cannot identify player %r' % player_url)
2502 return id_m.group('id')
2503
2504 def _load_player(self, video_id, player_url, fatal=True):
2505 player_id = self._extract_player_info(player_url)
2506 if player_id not in self._code_cache:
2507 code = self._download_webpage(
2508 player_url, video_id, fatal=fatal,
2509 note='Downloading player ' + player_id,
2510 errnote='Download of %s failed' % player_url)
2511 if code:
2512 self._code_cache[player_id] = code
2513 return self._code_cache.get(player_id)
2514
2515 def _extract_signature_function(self, video_id, player_url, example_sig):
2516 player_id = self._extract_player_info(player_url)
2517
2518 # Read from filesystem cache
2519 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
2520 assert os.path.basename(func_id) == func_id
2521
2522 self.write_debug(f'Extracting signature function {func_id}')
2523 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
2524
2525 if not cache_spec:
2526 code = self._load_player(video_id, player_url)
2527 if code:
2528 res = self._parse_sig_js(code)
2529 test_string = ''.join(map(chr, range(len(example_sig))))
2530 cache_spec = [ord(c) for c in res(test_string)]
2531 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
2532
2533 return lambda s: ''.join(s[i] for i in cache_spec)
2534
2535 def _print_sig_code(self, func, example_sig):
2536 if not self.get_param('youtube_print_sig_code'):
2537 return
2538
2539 def gen_sig_code(idxs):
2540 def _genslice(start, end, step):
2541 starts = '' if start == 0 else str(start)
2542 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
2543 steps = '' if step == 1 else (':%d' % step)
2544 return f's[{starts}{ends}{steps}]'
2545
2546 step = None
2547 # Quelch pyflakes warnings - start will be set when step is set
2548 start = '(Never used)'
2549 for i, prev in zip(idxs[1:], idxs[:-1]):
2550 if step is not None:
2551 if i - prev == step:
2552 continue
2553 yield _genslice(start, prev, step)
2554 step = None
2555 continue
2556 if i - prev in [-1, 1]:
2557 step = i - prev
2558 start = prev
2559 continue
2560 else:
2561 yield 's[%d]' % prev
2562 if step is None:
2563 yield 's[%d]' % i
2564 else:
2565 yield _genslice(start, i, step)
2566
2567 test_string = ''.join(map(chr, range(len(example_sig))))
2568 cache_res = func(test_string)
2569 cache_spec = [ord(c) for c in cache_res]
2570 expr_code = ' + '.join(gen_sig_code(cache_spec))
2571 signature_id_tuple = '(%s)' % (
2572 ', '.join(str(len(p)) for p in example_sig.split('.')))
2573 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
2574 ' return %s\n') % (signature_id_tuple, expr_code)
2575 self.to_screen('Extracted signature function:\n' + code)
2576
2577 def _parse_sig_js(self, jscode):
2578 funcname = self._search_regex(
2579 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2580 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2581 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2582 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2583 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2584 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2585 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2586 # Obsolete patterns
2587 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2588 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2589 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2590 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2591 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2592 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2593 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2594 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2595 jscode, 'Initial JS player signature function name', group='sig')
2596
2597 jsi = JSInterpreter(jscode)
2598 initial_function = jsi.extract_function(funcname)
2599 return lambda s: initial_function([s])
2600
2601 def _cached(self, func, *cache_id):
2602 def inner(*args, **kwargs):
2603 if cache_id not in self._player_cache:
2604 try:
2605 self._player_cache[cache_id] = func(*args, **kwargs)
2606 except ExtractorError as e:
2607 self._player_cache[cache_id] = e
2608 except Exception as e:
2609 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2610
2611 ret = self._player_cache[cache_id]
2612 if isinstance(ret, Exception):
2613 raise ret
2614 return ret
2615 return inner
2616
2617 def _decrypt_signature(self, s, video_id, player_url):
2618 """Turn the encrypted s field into a working signature"""
2619 extract_sig = self._cached(
2620 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2621 func = extract_sig(video_id, player_url, s)
2622 self._print_sig_code(func, s)
2623 return func(s)
2624
2625 def _decrypt_nsig(self, s, video_id, player_url):
2626 """Turn the encrypted n field into a working signature"""
2627 if player_url is None:
2628 raise ExtractorError('Cannot decrypt nsig without player_url')
2629 player_url = urljoin('https://www.youtube.com', player_url)
2630
2631 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2632 if self.get_param('youtube_print_sig_code'):
2633 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2634
2635 try:
2636 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2637 ret = extract_nsig(jsi, func_code)(s)
2638 except JSInterpreter.Exception as e:
2639 try:
2640 jsi = PhantomJSwrapper(self, timeout=5000)
2641 except ExtractorError:
2642 raise e
2643 self.report_warning(
2644 f'Native nsig extraction failed: Trying with PhantomJS\n'
2645 f' n = {s} ; player = {player_url}', video_id)
2646 self.write_debug(e)
2647
2648 args, func_body = func_code
2649 ret = jsi.execute(
2650 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2651 video_id=video_id, note='Executing signature code').strip()
2652
2653 self.write_debug(f'Decrypted nsig {s} => {ret}')
2654 return ret
2655
2656 def _extract_n_function_name(self, jscode):
2657 funcname, idx = self._search_regex(
2658 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2659 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2660 if not idx:
2661 return funcname
2662
2663 return json.loads(js_to_json(self._search_regex(
2664 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2665 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2666
2667 def _extract_n_function_code(self, video_id, player_url):
2668 player_id = self._extract_player_info(player_url)
2669 func_code = self.cache.load('youtube-nsig', player_id, after='2022.08.19')
2670 jscode = func_code or self._load_player(video_id, player_url)
2671 jsi = JSInterpreter(jscode)
2672
2673 if func_code:
2674 return jsi, player_id, func_code
2675
2676 func_code = jsi.extract_function_code(self._extract_n_function_name(jscode))
2677 self.cache.store('youtube-nsig', player_id, func_code)
2678 return jsi, player_id, func_code
2679
2680 def _extract_n_function_from_code(self, jsi, func_code):
2681 func = jsi.extract_function_from_code(*func_code)
2682
2683 def extract_nsig(s):
2684 try:
2685 ret = func([s])
2686 except JSInterpreter.Exception:
2687 raise
2688 except Exception as e:
2689 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
2690
2691 if ret.startswith('enhanced_except_'):
2692 raise JSInterpreter.Exception('Signature function returned an exception')
2693 return ret
2694
2695 return extract_nsig
2696
2697 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2698 """
2699 Extract signatureTimestamp (sts)
2700 Required to tell API what sig/player version is in use.
2701 """
2702 sts = None
2703 if isinstance(ytcfg, dict):
2704 sts = int_or_none(ytcfg.get('STS'))
2705
2706 if not sts:
2707 # Attempt to extract from player
2708 if player_url is None:
2709 error_msg = 'Cannot extract signature timestamp without player_url.'
2710 if fatal:
2711 raise ExtractorError(error_msg)
2712 self.report_warning(error_msg)
2713 return
2714 code = self._load_player(video_id, player_url, fatal=fatal)
2715 if code:
2716 sts = int_or_none(self._search_regex(
2717 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2718 'JS player signature timestamp', group='sts', fatal=fatal))
2719 return sts
2720
2721 def _mark_watched(self, video_id, player_responses):
2722 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
2723 label = 'fully ' if is_full else ''
2724 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
2725 expected_type=url_or_none)
2726 if not url:
2727 self.report_warning(f'Unable to mark {label}watched')
2728 return
2729 parsed_url = urllib.parse.urlparse(url)
2730 qs = urllib.parse.parse_qs(parsed_url.query)
2731
2732 # cpn generation algorithm is reverse engineered from base.js.
2733 # In fact it works even with dummy cpn.
2734 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2735 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2736
2737 # # more consistent results setting it to right before the end
2738 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
2739
2740 qs.update({
2741 'ver': ['2'],
2742 'cpn': [cpn],
2743 'cmt': video_length,
2744 'el': 'detailpage', # otherwise defaults to "shorts"
2745 })
2746
2747 if is_full:
2748 # these seem to mark watchtime "history" in the real world
2749 # they're required, so send in a single value
2750 qs.update({
2751 'st': video_length,
2752 'et': video_length,
2753 })
2754
2755 url = urllib.parse.urlunparse(
2756 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
2757
2758 self._download_webpage(
2759 url, video_id, f'Marking {label}watched',
2760 'Unable to mark watched', fatal=False)
2761
2762 @classmethod
2763 def _extract_from_webpage(cls, url, webpage):
2764 # Invidious Instances
2765 # https://github.com/yt-dlp/yt-dlp/issues/195
2766 # https://github.com/iv-org/invidious/pull/1730
2767 mobj = re.search(
2768 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
2769 webpage)
2770 if mobj:
2771 yield cls.url_result(mobj.group('url'), cls)
2772 raise cls.StopExtraction()
2773
2774 yield from super()._extract_from_webpage(url, webpage)
2775
2776 # lazyYT YouTube embed
2777 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
2778 yield cls.url_result(unescapeHTML(id_), cls, id_)
2779
2780 # Wordpress "YouTube Video Importer" plugin
2781 for m in re.findall(r'''(?x)<div[^>]+
2782 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2783 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
2784 yield cls.url_result(m[-1], cls, m[-1])
2785
2786 @classmethod
2787 def extract_id(cls, url):
2788 video_id = cls.get_temp_id(url)
2789 if not video_id:
2790 raise ExtractorError(f'Invalid URL: {url}')
2791 return video_id
2792
2793 def _extract_chapters_from_json(self, data, duration):
2794 chapter_list = traverse_obj(
2795 data, (
2796 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2797 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2798 ), expected_type=list)
2799
2800 return self._extract_chapters(
2801 chapter_list,
2802 chapter_time=lambda chapter: float_or_none(
2803 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2804 chapter_title=lambda chapter: traverse_obj(
2805 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2806 duration=duration)
2807
2808 def _extract_chapters_from_engagement_panel(self, data, duration):
2809 content_list = traverse_obj(
2810 data,
2811 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2812 expected_type=list, default=[])
2813 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2814 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2815
2816 return next(filter(None, (
2817 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2818 chapter_time, chapter_title, duration)
2819 for contents in content_list)), [])
2820
2821 def _extract_chapters_from_description(self, description, duration):
2822 return self._extract_chapters(
2823 re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
2824 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
2825 duration=duration, strict=False)
2826
2827 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
2828 if not duration:
2829 return
2830 chapter_list = [{
2831 'start_time': chapter_time(chapter),
2832 'title': chapter_title(chapter),
2833 } for chapter in chapter_list or []]
2834 if not strict:
2835 chapter_list.sort(key=lambda c: c['start_time'] or 0)
2836
2837 chapters = [{'start_time': 0}]
2838 for idx, chapter in enumerate(chapter_list):
2839 if chapter['start_time'] is None:
2840 self.report_warning(f'Incomplete chapter {idx}')
2841 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
2842 chapters.append(chapter)
2843 else:
2844 self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
2845 return chapters[1:]
2846
2847 def _extract_comment(self, comment_renderer, parent=None):
2848 comment_id = comment_renderer.get('commentId')
2849 if not comment_id:
2850 return
2851
2852 text = self._get_text(comment_renderer, 'contentText')
2853
2854 # note: timestamp is an estimate calculated from the current time and time_text
2855 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
2856 author = self._get_text(comment_renderer, 'authorText')
2857 author_id = try_get(comment_renderer,
2858 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
2859
2860 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2861 lambda x: x['likeCount']), str)) or 0
2862 author_thumbnail = try_get(comment_renderer,
2863 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
2864
2865 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2866 is_favorited = 'creatorHeart' in (try_get(
2867 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2868 return {
2869 'id': comment_id,
2870 'text': text,
2871 'timestamp': timestamp,
2872 'time_text': time_text,
2873 'like_count': votes,
2874 'is_favorited': is_favorited,
2875 'author': author,
2876 'author_id': author_id,
2877 'author_thumbnail': author_thumbnail,
2878 'author_is_uploader': author_is_uploader,
2879 'parent': parent or 'root'
2880 }
2881
2882 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2883
2884 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2885
2886 def extract_header(contents):
2887 _continuation = None
2888 for content in contents:
2889 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
2890 expected_comment_count = self._get_count(
2891 comments_header_renderer, 'countText', 'commentsCount')
2892
2893 if expected_comment_count:
2894 tracker['est_total'] = expected_comment_count
2895 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2896 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2897
2898 sort_menu_item = try_get(
2899 comments_header_renderer,
2900 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2901 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2902
2903 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2904 if not _continuation:
2905 continue
2906
2907 sort_text = str_or_none(sort_menu_item.get('title'))
2908 if not sort_text:
2909 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2910 self.to_screen('Sorting comments by %s' % sort_text.lower())
2911 break
2912 return _continuation
2913
2914 def extract_thread(contents):
2915 if not parent:
2916 tracker['current_page_thread'] = 0
2917 for content in contents:
2918 if not parent and tracker['total_parent_comments'] >= max_parents:
2919 yield
2920 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2921 comment_renderer = get_first(
2922 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2923 expected_type=dict, default={})
2924
2925 comment = self._extract_comment(comment_renderer, parent)
2926 if not comment:
2927 continue
2928
2929 tracker['running_total'] += 1
2930 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
2931 yield comment
2932
2933 # Attempt to get the replies
2934 comment_replies_renderer = try_get(
2935 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2936
2937 if comment_replies_renderer:
2938 tracker['current_page_thread'] += 1
2939 comment_entries_iter = self._comment_entries(
2940 comment_replies_renderer, ytcfg, video_id,
2941 parent=comment.get('id'), tracker=tracker)
2942 yield from itertools.islice(comment_entries_iter, min(
2943 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
2944
2945 # Keeps track of counts across recursive calls
2946 if not tracker:
2947 tracker = dict(
2948 running_total=0,
2949 est_total=0,
2950 current_page_thread=0,
2951 total_parent_comments=0,
2952 total_reply_comments=0)
2953
2954 # TODO: Deprecated
2955 # YouTube comments have a max depth of 2
2956 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2957 if max_depth:
2958 self._downloader.deprecation_warning(
2959 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2960 if max_depth == 1 and parent:
2961 return
2962
2963 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2964 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2965
2966 continuation = self._extract_continuation(root_continuation_data)
2967
2968 response = None
2969 is_forced_continuation = False
2970 is_first_continuation = parent is None
2971 if is_first_continuation and not continuation:
2972 # Sometimes you can get comments by generating the continuation yourself,
2973 # even if YouTube initially reports them being disabled - e.g. stories comments.
2974 # Note: if the comment section is actually disabled, YouTube may return a response with
2975 # required check_get_keys missing. So we will disable that check initially in this case.
2976 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
2977 is_forced_continuation = True
2978
2979 for page_num in itertools.count(0):
2980 if not continuation:
2981 break
2982 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2983 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2984 if page_num == 0:
2985 if is_first_continuation:
2986 note_prefix = 'Downloading comment section API JSON'
2987 else:
2988 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2989 tracker['current_page_thread'], comment_prog_str)
2990 else:
2991 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2992 ' ' if parent else '', ' replies' if parent else '',
2993 page_num, comment_prog_str)
2994
2995 response = self._extract_response(
2996 item_id=None, query=continuation,
2997 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2998 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
2999 is_forced_continuation = False
3000 continuation_contents = traverse_obj(
3001 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
3002
3003 continuation = None
3004 for continuation_section in continuation_contents:
3005 continuation_items = traverse_obj(
3006 continuation_section,
3007 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3008 get_all=False, expected_type=list) or []
3009 if is_first_continuation:
3010 continuation = extract_header(continuation_items)
3011 is_first_continuation = False
3012 if continuation:
3013 break
3014 continue
3015
3016 for entry in extract_thread(continuation_items):
3017 if not entry:
3018 return
3019 yield entry
3020 continuation = self._extract_continuation({'contents': continuation_items})
3021 if continuation:
3022 break
3023
3024 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3025 if message and not parent and tracker['running_total'] == 0:
3026 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3027
3028 @staticmethod
3029 def _generate_comment_continuation(video_id):
3030 """
3031 Generates initial comment section continuation token from given video id
3032 """
3033 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3034 return base64.b64encode(token.encode()).decode()
3035
3036 def _get_comments(self, ytcfg, video_id, contents, webpage):
3037 """Entry for comment extraction"""
3038 def _real_comment_extract(contents):
3039 renderer = next((
3040 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3041 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3042 yield from self._comment_entries(renderer, ytcfg, video_id)
3043
3044 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3045 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3046
3047 @staticmethod
3048 def _get_checkok_params():
3049 return {'contentCheckOk': True, 'racyCheckOk': True}
3050
3051 @classmethod
3052 def _generate_player_context(cls, sts=None):
3053 context = {
3054 'html5Preference': 'HTML5_PREF_WANTS',
3055 }
3056 if sts is not None:
3057 context['signatureTimestamp'] = sts
3058 return {
3059 'playbackContext': {
3060 'contentPlaybackContext': context
3061 },
3062 **cls._get_checkok_params()
3063 }
3064
3065 @staticmethod
3066 def _is_agegated(player_response):
3067 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3068 return True
3069
3070 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3071 AGE_GATE_REASONS = (
3072 'confirm your age', 'age-restricted', 'inappropriate', # reason
3073 'age_verification_required', 'age_check_required', # status
3074 )
3075 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3076
3077 @staticmethod
3078 def _is_unplayable(player_response):
3079 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3080
3081 _STORY_PLAYER_PARAMS = '8AEB'
3082
3083 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
3084
3085 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3086 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3087 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3088 headers = self.generate_api_headers(
3089 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3090
3091 yt_query = {
3092 'videoId': video_id,
3093 }
3094 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3095 yt_query['params'] = self._STORY_PLAYER_PARAMS
3096
3097 yt_query.update(self._generate_player_context(sts))
3098 return self._extract_response(
3099 item_id=video_id, ep='player', query=yt_query,
3100 ytcfg=player_ytcfg, headers=headers, fatal=True,
3101 default_client=client,
3102 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3103 ) or None
3104
3105 def _get_requested_clients(self, url, smuggled_data):
3106 requested_clients = []
3107 default = ['android', 'web']
3108 allowed_clients = sorted(
3109 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
3110 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3111 for client in self._configuration_arg('player_client'):
3112 if client in allowed_clients:
3113 requested_clients.append(client)
3114 elif client == 'default':
3115 requested_clients.extend(default)
3116 elif client == 'all':
3117 requested_clients.extend(allowed_clients)
3118 else:
3119 self.report_warning(f'Skipping unsupported client {client}')
3120 if not requested_clients:
3121 requested_clients = default
3122
3123 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3124 requested_clients.extend(
3125 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
3126
3127 return orderedSet(requested_clients)
3128
3129 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
3130 initial_pr = None
3131 if webpage:
3132 initial_pr = self._search_json(
3133 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3134
3135 all_clients = set(clients)
3136 clients = clients[::-1]
3137 prs = []
3138
3139 def append_client(*client_names):
3140 """ Append the first client name that exists but not already used """
3141 for client_name in client_names:
3142 actual_client = _split_innertube_client(client_name)[0]
3143 if actual_client in INNERTUBE_CLIENTS:
3144 if actual_client not in all_clients:
3145 clients.append(client_name)
3146 all_clients.add(actual_client)
3147 return
3148
3149 # Android player_response does not have microFormats which are needed for
3150 # extraction of some data. So we return the initial_pr with formats
3151 # stripped out even if not requested by the user
3152 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3153 if initial_pr:
3154 pr = dict(initial_pr)
3155 pr['streamingData'] = None
3156 prs.append(pr)
3157
3158 last_error = None
3159 tried_iframe_fallback = False
3160 player_url = None
3161 while clients:
3162 client, base_client, variant = _split_innertube_client(clients.pop())
3163 player_ytcfg = master_ytcfg if client == 'web' else {}
3164 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3165 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3166
3167 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3168 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3169 if 'js' in self._configuration_arg('player_skip'):
3170 require_js_player = False
3171 player_url = None
3172
3173 if not player_url and not tried_iframe_fallback and require_js_player:
3174 player_url = self._download_player_url(video_id)
3175 tried_iframe_fallback = True
3176
3177 try:
3178 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
3179 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
3180 except ExtractorError as e:
3181 if last_error:
3182 self.report_warning(last_error)
3183 last_error = e
3184 continue
3185
3186 if pr:
3187 # YouTube may return a different video player response than expected.
3188 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3189 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3190 if pr_video_id and pr_video_id != video_id:
3191 self.report_warning(
3192 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
3193 else:
3194 prs.append(pr)
3195
3196 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
3197 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3198 append_client(f'{base_client}_creator')
3199 elif self._is_agegated(pr):
3200 if variant == 'tv_embedded':
3201 append_client(f'{base_client}_embedded')
3202 elif not variant:
3203 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
3204
3205 if last_error:
3206 if not len(prs):
3207 raise last_error
3208 self.report_warning(last_error)
3209 return prs, player_url
3210
3211 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
3212 itags, stream_ids = {}, []
3213 itag_qualities, res_qualities = {}, {0: None}
3214 q = qualities([
3215 # Normally tiny is the smallest video-only formats. But
3216 # audio-only formats with unknown quality may get tagged as tiny
3217 'tiny',
3218 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3219 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3220 ])
3221 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
3222
3223 for fmt in streaming_formats:
3224 if fmt.get('targetDurationSec'):
3225 continue
3226
3227 itag = str_or_none(fmt.get('itag'))
3228 audio_track = fmt.get('audioTrack') or {}
3229 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3230 if stream_id in stream_ids:
3231 continue
3232
3233 quality = fmt.get('quality')
3234 height = int_or_none(fmt.get('height'))
3235 if quality == 'tiny' or not quality:
3236 quality = fmt.get('audioQuality', '').lower() or quality
3237 # The 3gp format (17) in android client has a quality of "small",
3238 # but is actually worse than other formats
3239 if itag == '17':
3240 quality = 'tiny'
3241 if quality:
3242 if itag:
3243 itag_qualities[itag] = quality
3244 if height:
3245 res_qualities[height] = quality
3246 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3247 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3248 # number of fragment that would subsequently requested with (`&sq=N`)
3249 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3250 continue
3251
3252 fmt_url = fmt.get('url')
3253 if not fmt_url:
3254 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3255 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3256 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3257 if not all((sc, fmt_url, player_url, encrypted_sig)):
3258 continue
3259 try:
3260 fmt_url += '&%s=%s' % (
3261 traverse_obj(sc, ('sp', -1)) or 'signature',
3262 self._decrypt_signature(encrypted_sig, video_id, player_url)
3263 )
3264 except ExtractorError as e:
3265 self.report_warning('Signature extraction failed: Some formats may be missing',
3266 video_id=video_id, only_once=True)
3267 self.write_debug(e, only_once=True)
3268 continue
3269
3270 query = parse_qs(fmt_url)
3271 throttled = False
3272 if query.get('n'):
3273 try:
3274 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3275 fmt_url = update_url_query(fmt_url, {
3276 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3277 })
3278 except ExtractorError as e:
3279 phantomjs_hint = ''
3280 if isinstance(e, JSInterpreter.Exception):
3281 phantomjs_hint = f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} to workaround the issue\n'
3282 self.report_warning(
3283 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3284 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3285 self.write_debug(e, only_once=True)
3286 throttled = True
3287
3288 if itag:
3289 itags[itag] = 'https'
3290 stream_ids.append(stream_id)
3291
3292 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
3293 language_preference = (
3294 10 if audio_track.get('audioIsDefault') and 10
3295 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3296 else -1)
3297 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3298 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
3299 # Make sure to avoid false positives with small duration differences.
3300 # E.g. __2ABJjxzNo, ySuUZEjARPY
3301 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
3302 if is_damaged:
3303 self.report_warning(
3304 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
3305 dct = {
3306 'asr': int_or_none(fmt.get('audioSampleRate')),
3307 'filesize': int_or_none(fmt.get('contentLength')),
3308 'format_id': itag,
3309 'format_note': join_nonempty(
3310 '%s%s' % (audio_track.get('displayName') or '',
3311 ' (default)' if language_preference > 0 else ''),
3312 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
3313 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3314 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
3315 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
3316 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3317 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
3318 'fps': int_or_none(fmt.get('fps')) or None,
3319 'audio_channels': fmt.get('audioChannels'),
3320 'height': height,
3321 'quality': q(quality),
3322 'has_drm': bool(fmt.get('drmFamilies')),
3323 'tbr': tbr,
3324 'url': fmt_url,
3325 'width': int_or_none(fmt.get('width')),
3326 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3327 'desc' if language_preference < -1 else ''),
3328 'language_preference': language_preference,
3329 # Strictly de-prioritize damaged and 3gp formats
3330 'preference': -10 if is_damaged else -2 if itag == '17' else None,
3331 }
3332 mime_mobj = re.match(
3333 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3334 if mime_mobj:
3335 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3336 dct.update(parse_codecs(mime_mobj.group(2)))
3337 no_audio = dct.get('acodec') == 'none'
3338 no_video = dct.get('vcodec') == 'none'
3339 if no_audio:
3340 dct['vbr'] = tbr
3341 if no_video:
3342 dct['abr'] = tbr
3343 if no_audio or no_video:
3344 dct['downloader_options'] = {
3345 # Youtube throttles chunks >~10M
3346 'http_chunk_size': 10485760,
3347 }
3348 if dct.get('ext'):
3349 dct['container'] = dct['ext'] + '_dash'
3350 yield dct
3351
3352 live_from_start = is_live and self.get_param('live_from_start')
3353 skip_manifests = self._configuration_arg('skip')
3354 if not self.get_param('youtube_include_hls_manifest', True):
3355 skip_manifests.append('hls')
3356 if not self.get_param('youtube_include_dash_manifest', True):
3357 skip_manifests.append('dash')
3358 get_dash = 'dash' not in skip_manifests and (
3359 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3360 get_hls = not live_from_start and 'hls' not in skip_manifests
3361
3362 def process_manifest_format(f, proto, itag):
3363 if itag in itags:
3364 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3365 return False
3366 itag = f'{itag}-{proto}'
3367 if itag:
3368 f['format_id'] = itag
3369 itags[itag] = proto
3370
3371 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
3372 if f['quality'] == -1 and f.get('height'):
3373 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
3374 return True
3375
3376 subtitles = {}
3377 for sd in streaming_data:
3378 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
3379 if hls_manifest_url:
3380 fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)
3381 subtitles = self._merge_subtitles(subs, subtitles)
3382 for f in fmts:
3383 if process_manifest_format(f, 'hls', self._search_regex(
3384 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3385 yield f
3386
3387 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3388 if dash_manifest_url:
3389 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3390 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3391 for f in formats:
3392 if process_manifest_format(f, 'dash', f['format_id']):
3393 f['filesize'] = int_or_none(self._search_regex(
3394 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
3395 if live_from_start:
3396 f['is_from_start'] = True
3397
3398 yield f
3399 yield subtitles
3400
3401 def _extract_storyboard(self, player_responses, duration):
3402 spec = get_first(
3403 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
3404 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3405 if not base_url:
3406 return
3407 L = len(spec) - 1
3408 for i, args in enumerate(spec):
3409 args = args.split('#')
3410 counts = list(map(int_or_none, args[:5]))
3411 if len(args) != 8 or not all(counts):
3412 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3413 continue
3414 width, height, frame_count, cols, rows = counts
3415 N, sigh = args[6:]
3416
3417 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3418 fragment_count = frame_count / (cols * rows)
3419 fragment_duration = duration / fragment_count
3420 yield {
3421 'format_id': f'sb{i}',
3422 'format_note': 'storyboard',
3423 'ext': 'mhtml',
3424 'protocol': 'mhtml',
3425 'acodec': 'none',
3426 'vcodec': 'none',
3427 'url': url,
3428 'width': width,
3429 'height': height,
3430 'fps': frame_count / duration,
3431 'rows': rows,
3432 'columns': cols,
3433 'fragments': [{
3434 'url': url.replace('$M', str(j)),
3435 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3436 } for j in range(math.ceil(fragment_count))],
3437 }
3438
3439 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
3440 webpage = None
3441 if 'webpage' not in self._configuration_arg('player_skip'):
3442 query = {'bpctr': '9999999999', 'has_verified': '1'}
3443 if smuggled_data.get('is_story'):
3444 query['pp'] = self._STORY_PLAYER_PARAMS
3445 webpage = self._download_webpage(
3446 webpage_url, video_id, fatal=False, query=query)
3447
3448 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
3449
3450 player_responses, player_url = self._extract_player_responses(
3451 self._get_requested_clients(url, smuggled_data),
3452 video_id, webpage, master_ytcfg, smuggled_data)
3453
3454 return webpage, master_ytcfg, player_responses, player_url
3455
3456 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
3457 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3458 is_live = get_first(video_details, 'isLive')
3459 if is_live is None:
3460 is_live = get_first(live_broadcast_details, 'isLiveNow')
3461
3462 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
3463 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)
3464
3465 return live_broadcast_details, is_live, streaming_data, formats, subtitles
3466
3467 def _real_extract(self, url):
3468 url, smuggled_data = unsmuggle_url(url, {})
3469 video_id = self._match_id(url)
3470
3471 base_url = self.http_scheme() + '//www.youtube.com/'
3472 webpage_url = base_url + 'watch?v=' + video_id
3473
3474 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3475
3476 playability_statuses = traverse_obj(
3477 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3478
3479 trailer_video_id = get_first(
3480 playability_statuses,
3481 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3482 expected_type=str)
3483 if trailer_video_id:
3484 return self.url_result(
3485 trailer_video_id, self.ie_key(), trailer_video_id)
3486
3487 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3488 if webpage else (lambda x: None))
3489
3490 video_details = traverse_obj(
3491 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3492 microformats = traverse_obj(
3493 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3494 expected_type=dict, default=[])
3495 video_title = (
3496 get_first(video_details, 'title')
3497 or self._get_text(microformats, (..., 'title'))
3498 or search_meta(['og:title', 'twitter:title', 'title']))
3499 video_description = get_first(video_details, 'shortDescription')
3500
3501 multifeed_metadata_list = get_first(
3502 player_responses,
3503 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3504 expected_type=str)
3505 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3506 if self.get_param('noplaylist'):
3507 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3508 else:
3509 entries = []
3510 feed_ids = []
3511 for feed in multifeed_metadata_list.split(','):
3512 # Unquote should take place before split on comma (,) since textual
3513 # fields may contain comma as well (see
3514 # https://github.com/ytdl-org/youtube-dl/issues/8536)
3515 feed_data = urllib.parse.parse_qs(
3516 urllib.parse.unquote_plus(feed))
3517
3518 def feed_entry(name):
3519 return try_get(
3520 feed_data, lambda x: x[name][0], str)
3521
3522 feed_id = feed_entry('id')
3523 if not feed_id:
3524 continue
3525 feed_title = feed_entry('title')
3526 title = video_title
3527 if feed_title:
3528 title += ' (%s)' % feed_title
3529 entries.append({
3530 '_type': 'url_transparent',
3531 'ie_key': 'Youtube',
3532 'url': smuggle_url(
3533 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3534 {'force_singlefeed': True}),
3535 'title': title,
3536 })
3537 feed_ids.append(feed_id)
3538 self.to_screen(
3539 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3540 % (', '.join(feed_ids), video_id))
3541 return self.playlist_result(
3542 entries, video_id, video_title, video_description)
3543
3544 duration = int_or_none(
3545 get_first(video_details, 'lengthSeconds')
3546 or get_first(microformats, 'lengthSeconds')
3547 or parse_duration(search_meta('duration'))) or None
3548
3549 live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \
3550 self._list_formats(video_id, microformats, video_details, player_responses, player_url)
3551
3552 if not formats:
3553 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
3554 self.report_drm(video_id)
3555 pemr = get_first(
3556 playability_statuses,
3557 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3558 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3559 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
3560 if subreason:
3561 if subreason == 'The uploader has not made this video available in your country.':
3562 countries = get_first(microformats, 'availableCountries')
3563 if not countries:
3564 regions_allowed = search_meta('regionsAllowed')
3565 countries = regions_allowed.split(',') if regions_allowed else None
3566 self.raise_geo_restricted(subreason, countries, metadata_available=True)
3567 reason += f'. {subreason}'
3568 if reason:
3569 self.raise_no_formats(reason, expected=True)
3570
3571 keywords = get_first(video_details, 'keywords', expected_type=list) or []
3572 if not keywords and webpage:
3573 keywords = [
3574 unescapeHTML(m.group('content'))
3575 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3576 for keyword in keywords:
3577 if keyword.startswith('yt:stretch='):
3578 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3579 if mobj:
3580 # NB: float is intentional for forcing float division
3581 w, h = (float(v) for v in mobj.groups())
3582 if w > 0 and h > 0:
3583 ratio = w / h
3584 for f in formats:
3585 if f.get('vcodec') != 'none':
3586 f['stretched_ratio'] = ratio
3587 break
3588 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
3589 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3590 if thumbnail_url:
3591 thumbnails.append({
3592 'url': thumbnail_url,
3593 })
3594 original_thumbnails = thumbnails.copy()
3595
3596 # The best resolution thumbnails sometimes does not appear in the webpage
3597 # See: https://github.com/yt-dlp/yt-dlp/issues/340
3598 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
3599 thumbnail_names = [
3600 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
3601 # in resolution, these are not the custom thumbnail. So de-prioritize them
3602 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3603 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
3604 ]
3605 n_thumbnail_names = len(thumbnail_names)
3606 thumbnails.extend({
3607 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3608 video_id=video_id, name=name, ext=ext,
3609 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
3610 } for name in thumbnail_names for ext in ('webp', 'jpg'))
3611 for thumb in thumbnails:
3612 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
3613 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
3614 self._remove_duplicate_formats(thumbnails)
3615 self._downloader._sort_thumbnails(original_thumbnails)
3616
3617 category = get_first(microformats, 'category') or search_meta('genre')
3618 channel_id = str_or_none(
3619 get_first(video_details, 'channelId')
3620 or get_first(microformats, 'externalChannelId')
3621 or search_meta('channelId'))
3622 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3623
3624 live_content = get_first(video_details, 'isLiveContent')
3625 is_upcoming = get_first(video_details, 'isUpcoming')
3626 if is_live is None:
3627 if is_upcoming or live_content is False:
3628 is_live = False
3629 if is_upcoming is None and (live_content or is_live):
3630 is_upcoming = False
3631 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3632 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3633 if not duration and live_end_time and live_start_time:
3634 duration = live_end_time - live_start_time
3635
3636 if is_live and self.get_param('live_from_start'):
3637 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
3638
3639 formats.extend(self._extract_storyboard(player_responses, duration))
3640
3641 # source_preference is lower for throttled/potentially damaged formats
3642 self._sort_formats(formats, (
3643 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
3644
3645 info = {
3646 'id': video_id,
3647 'title': video_title,
3648 'formats': formats,
3649 'thumbnails': thumbnails,
3650 # The best thumbnail that we are sure exists. Prevents unnecessary
3651 # URL checking if user don't care about getting the best possible thumbnail
3652 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
3653 'description': video_description,
3654 'uploader': get_first(video_details, 'author'),
3655 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3656 'uploader_url': owner_profile_url,
3657 'channel_id': channel_id,
3658 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
3659 'duration': duration,
3660 'view_count': int_or_none(
3661 get_first((video_details, microformats), (..., 'viewCount'))
3662 or search_meta('interactionCount')),
3663 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
3664 'age_limit': 18 if (
3665 get_first(microformats, 'isFamilySafe') is False
3666 or search_meta('isFamilyFriendly') == 'false'
3667 or search_meta('og:restrictions:age') == '18+') else 0,
3668 'webpage_url': webpage_url,
3669 'categories': [category] if category else None,
3670 'tags': keywords,
3671 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
3672 'is_live': is_live,
3673 'was_live': (False if is_live or is_upcoming or live_content is False
3674 else None if is_live is None or is_upcoming is None
3675 else live_content),
3676 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
3677 'release_timestamp': live_start_time,
3678 }
3679
3680 if get_first(video_details, 'isPostLiveDvr'):
3681 self.write_debug('Video is in Post-Live Manifestless mode')
3682 info['live_status'] = 'post_live'
3683 if (duration or 0) > 4 * 3600:
3684 self.report_warning(
3685 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
3686 'This is a known issue and patches are welcome')
3687
3688 subtitles = {}
3689 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
3690 if pctr:
3691 def get_lang_code(track):
3692 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3693 or track.get('languageCode'))
3694
3695 # Converted into dicts to remove duplicates
3696 captions = {
3697 get_lang_code(sub): sub
3698 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3699 translation_languages = {
3700 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3701 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3702
3703 def process_language(container, base_url, lang_code, sub_name, query):
3704 lang_subs = container.setdefault(lang_code, [])
3705 for fmt in self._SUBTITLE_FORMATS:
3706 query.update({
3707 'fmt': fmt,
3708 })
3709 lang_subs.append({
3710 'ext': fmt,
3711 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
3712 'name': sub_name,
3713 })
3714
3715 # NB: Constructing the full subtitle dictionary is slow
3716 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
3717 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
3718 for lang_code, caption_track in captions.items():
3719 base_url = caption_track.get('baseUrl')
3720 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
3721 if not base_url:
3722 continue
3723 lang_name = self._get_text(caption_track, 'name', max_runs=1)
3724 if caption_track.get('kind') != 'asr':
3725 if not lang_code:
3726 continue
3727 process_language(
3728 subtitles, base_url, lang_code, lang_name, {})
3729 if not caption_track.get('isTranslatable'):
3730 continue
3731 for trans_code, trans_name in translation_languages.items():
3732 if not trans_code:
3733 continue
3734 orig_trans_code = trans_code
3735 if caption_track.get('kind') != 'asr':
3736 if not get_translated_subs:
3737 continue
3738 trans_code += f'-{lang_code}'
3739 trans_name += format_field(lang_name, None, ' from %s')
3740 # Add an "-orig" label to the original language so that it can be distinguished.
3741 # The subs are returned without "-orig" as well for compatibility
3742 if lang_code == f'a-{orig_trans_code}':
3743 process_language(
3744 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3745 # Setting tlang=lang returns damaged subtitles.
3746 process_language(automatic_captions, base_url, trans_code, trans_name,
3747 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
3748
3749 info['automatic_captions'] = automatic_captions
3750 info['subtitles'] = subtitles
3751
3752 parsed_url = urllib.parse.urlparse(url)
3753 for component in [parsed_url.fragment, parsed_url.query]:
3754 query = urllib.parse.parse_qs(component)
3755 for k, v in query.items():
3756 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3757 d_k += '_time'
3758 if d_k not in info and k in s_ks:
3759 info[d_k] = parse_duration(query[k][0])
3760
3761 # Youtube Music Auto-generated description
3762 if video_description:
3763 mobj = re.search(
3764 r'''(?xs)
3765 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
3766 (?P<album>[^\n]+)
3767 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
3768 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
3769 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
3770 .+\nAuto-generated\ by\ YouTube\.\s*$
3771 ''', video_description)
3772 if mobj:
3773 release_year = mobj.group('release_year')
3774 release_date = mobj.group('release_date')
3775 if release_date:
3776 release_date = release_date.replace('-', '')
3777 if not release_year:
3778 release_year = release_date[:4]
3779 info.update({
3780 'album': mobj.group('album'.strip()),
3781 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3782 'track': mobj.group('track').strip(),
3783 'release_date': release_date,
3784 'release_year': int_or_none(release_year),
3785 })
3786
3787 initial_data = None
3788 if webpage:
3789 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
3790 if not initial_data:
3791 query = {'videoId': video_id}
3792 query.update(self._get_checkok_params())
3793 initial_data = self._extract_response(
3794 item_id=video_id, ep='next', fatal=False,
3795 ytcfg=master_ytcfg, query=query,
3796 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3797 note='Downloading initial data API JSON')
3798
3799 info['comment_count'] = traverse_obj(initial_data, (
3800 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
3801 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
3802 ), (
3803 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
3804 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
3805 ), expected_type=int_or_none, get_all=False)
3806
3807 try: # This will error if there is no livechat
3808 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3809 except (KeyError, IndexError, TypeError):
3810 pass
3811 else:
3812 info.setdefault('subtitles', {})['live_chat'] = [{
3813 # url is needed to set cookies
3814 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
3815 'video_id': video_id,
3816 'ext': 'json',
3817 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3818 }]
3819
3820 if initial_data:
3821 info['chapters'] = (
3822 self._extract_chapters_from_json(initial_data, duration)
3823 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3824 or self._extract_chapters_from_description(video_description, duration)
3825 or None)
3826
3827 contents = traverse_obj(
3828 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
3829 expected_type=list, default=[])
3830
3831 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
3832 if vpir:
3833 stl = vpir.get('superTitleLink')
3834 if stl:
3835 stl = self._get_text(stl)
3836 if try_get(
3837 vpir,
3838 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3839 info['location'] = stl
3840 else:
3841 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
3842 if mobj:
3843 info.update({
3844 'series': mobj.group(1),
3845 'season_number': int(mobj.group(2)),
3846 'episode_number': int(mobj.group(3)),
3847 })
3848 for tlb in (try_get(
3849 vpir,
3850 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3851 list) or []):
3852 tbr = tlb.get('toggleButtonRenderer') or {}
3853 for getter, regex in [(
3854 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3855 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3856 lambda x: x['accessibility'],
3857 lambda x: x['accessibilityData']['accessibilityData'],
3858 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3859 label = (try_get(tbr, getter, dict) or {}).get('label')
3860 if label:
3861 mobj = re.match(regex, label)
3862 if mobj:
3863 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3864 break
3865 sbr_tooltip = try_get(
3866 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3867 if sbr_tooltip:
3868 like_count, dislike_count = sbr_tooltip.split(' / ')
3869 info.update({
3870 'like_count': str_to_int(like_count),
3871 'dislike_count': str_to_int(dislike_count),
3872 })
3873 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
3874 if vsir:
3875 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
3876 info.update({
3877 'channel': self._get_text(vor, 'title'),
3878 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
3879
3880 rows = try_get(
3881 vsir,
3882 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3883 list) or []
3884 multiple_songs = False
3885 for row in rows:
3886 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3887 multiple_songs = True
3888 break
3889 for row in rows:
3890 mrr = row.get('metadataRowRenderer') or {}
3891 mrr_title = mrr.get('title')
3892 if not mrr_title:
3893 continue
3894 mrr_title = self._get_text(mrr, 'title')
3895 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3896 if mrr_title == 'License':
3897 info['license'] = mrr_contents_text
3898 elif not multiple_songs:
3899 if mrr_title == 'Album':
3900 info['album'] = mrr_contents_text
3901 elif mrr_title == 'Artist':
3902 info['artist'] = mrr_contents_text
3903 elif mrr_title == 'Song':
3904 info['track'] = mrr_contents_text
3905
3906 fallbacks = {
3907 'channel': 'uploader',
3908 'channel_id': 'uploader_id',
3909 'channel_url': 'uploader_url',
3910 }
3911
3912 # The upload date for scheduled, live and past live streams / premieres in microformats
3913 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
3914 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
3915 upload_date = (
3916 unified_strdate(get_first(microformats, 'uploadDate'))
3917 or unified_strdate(search_meta('uploadDate')))
3918 if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
3919 upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
3920 info['upload_date'] = upload_date
3921
3922 for to, frm in fallbacks.items():
3923 if not info.get(to):
3924 info[to] = info.get(frm)
3925
3926 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3927 v = info.get(s_k)
3928 if v:
3929 info[d_k] = v
3930
3931 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3932 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3933 is_membersonly = None
3934 is_premium = None
3935 if initial_data and is_private is not None:
3936 is_membersonly = False
3937 is_premium = False
3938 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3939 badge_labels = set()
3940 for content in contents:
3941 if not isinstance(content, dict):
3942 continue
3943 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3944 for badge_label in badge_labels:
3945 if badge_label.lower() == 'members only':
3946 is_membersonly = True
3947 elif badge_label.lower() == 'premium':
3948 is_premium = True
3949 elif badge_label.lower() == 'unlisted':
3950 is_unlisted = True
3951
3952 info['availability'] = self._availability(
3953 is_private=is_private,
3954 needs_premium=is_premium,
3955 needs_subscription=is_membersonly,
3956 needs_auth=info['age_limit'] >= 18,
3957 is_unlisted=None if is_private is None else is_unlisted)
3958
3959 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3960
3961 self.mark_watched(video_id, player_responses)
3962
3963 return info
3964
3965
3966 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
3967
3968 @staticmethod
3969 def passthrough_smuggled_data(func):
3970 def _smuggle(entries, smuggled_data):
3971 for entry in entries:
3972 # TODO: Convert URL to music.youtube instead.
3973 # Do we need to passthrough any other smuggled_data?
3974 entry['url'] = smuggle_url(entry['url'], smuggled_data)
3975 yield entry
3976
3977 @functools.wraps(func)
3978 def wrapper(self, url):
3979 url, smuggled_data = unsmuggle_url(url, {})
3980 if self.is_music_url(url):
3981 smuggled_data['is_music_url'] = True
3982 info_dict = func(self, url, smuggled_data)
3983 if smuggled_data and info_dict.get('entries'):
3984 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
3985 return info_dict
3986 return wrapper
3987
3988 def _extract_channel_id(self, webpage):
3989 channel_id = self._html_search_meta(
3990 'channelId', webpage, 'channel id', default=None)
3991 if channel_id:
3992 return channel_id
3993 channel_url = self._html_search_meta(
3994 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3995 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3996 'twitter:app:url:googleplay'), webpage, 'channel url')
3997 return self._search_regex(
3998 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3999 channel_url, 'channel id')
4000
4001 @staticmethod
4002 def _extract_basic_item_renderer(item):
4003 # Modified from _extract_grid_item_renderer
4004 known_basic_renderers = (
4005 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
4006 )
4007 for key, renderer in item.items():
4008 if not isinstance(renderer, dict):
4009 continue
4010 elif key in known_basic_renderers:
4011 return renderer
4012 elif key.startswith('grid') and key.endswith('Renderer'):
4013 return renderer
4014
4015 def _grid_entries(self, grid_renderer):
4016 for item in grid_renderer['items']:
4017 if not isinstance(item, dict):
4018 continue
4019 renderer = self._extract_basic_item_renderer(item)
4020 if not isinstance(renderer, dict):
4021 continue
4022 title = self._get_text(renderer, 'title')
4023
4024 # playlist
4025 playlist_id = renderer.get('playlistId')
4026 if playlist_id:
4027 yield self.url_result(
4028 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4029 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4030 video_title=title)
4031 continue
4032 # video
4033 video_id = renderer.get('videoId')
4034 if video_id:
4035 yield self._extract_video(renderer)
4036 continue
4037 # channel
4038 channel_id = renderer.get('channelId')
4039 if channel_id:
4040 yield self.url_result(
4041 'https://www.youtube.com/channel/%s' % channel_id,
4042 ie=YoutubeTabIE.ie_key(), video_title=title)
4043 continue
4044 # generic endpoint URL support
4045 ep_url = urljoin('https://www.youtube.com/', try_get(
4046 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4047 str))
4048 if ep_url:
4049 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4050 if ie.suitable(ep_url):
4051 yield self.url_result(
4052 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4053 break
4054
4055 def _music_reponsive_list_entry(self, renderer):
4056 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4057 if video_id:
4058 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4059 ie=YoutubeIE.ie_key(), video_id=video_id)
4060 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4061 if playlist_id:
4062 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4063 if video_id:
4064 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4065 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4066 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4067 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4068 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4069 if browse_id:
4070 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4071 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4072
4073 def _shelf_entries_from_content(self, shelf_renderer):
4074 content = shelf_renderer.get('content')
4075 if not isinstance(content, dict):
4076 return
4077 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
4078 if renderer:
4079 # TODO: add support for nested playlists so each shelf is processed
4080 # as separate playlist
4081 # TODO: this includes only first N items
4082 yield from self._grid_entries(renderer)
4083 renderer = content.get('horizontalListRenderer')
4084 if renderer:
4085 # TODO
4086 pass
4087
4088 def _shelf_entries(self, shelf_renderer, skip_channels=False):
4089 ep = try_get(
4090 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4091 str)
4092 shelf_url = urljoin('https://www.youtube.com', ep)
4093 if shelf_url:
4094 # Skipping links to another channels, note that checking for
4095 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4096 # will not work
4097 if skip_channels and '/channels?' in shelf_url:
4098 return
4099 title = self._get_text(shelf_renderer, 'title')
4100 yield self.url_result(shelf_url, video_title=title)
4101 # Shelf may not contain shelf URL, fallback to extraction from content
4102 yield from self._shelf_entries_from_content(shelf_renderer)
4103
4104 def _playlist_entries(self, video_list_renderer):
4105 for content in video_list_renderer['contents']:
4106 if not isinstance(content, dict):
4107 continue
4108 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4109 if not isinstance(renderer, dict):
4110 continue
4111 video_id = renderer.get('videoId')
4112 if not video_id:
4113 continue
4114 yield self._extract_video(renderer)
4115
4116 def _rich_entries(self, rich_grid_renderer):
4117 renderer = try_get(
4118 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
4119 video_id = renderer.get('videoId')
4120 if not video_id:
4121 return
4122 yield self._extract_video(renderer)
4123
4124 def _video_entry(self, video_renderer):
4125 video_id = video_renderer.get('videoId')
4126 if video_id:
4127 return self._extract_video(video_renderer)
4128
4129 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4130 url = urljoin('https://youtube.com', traverse_obj(
4131 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4132 if url:
4133 return self.url_result(
4134 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4135
4136 def _post_thread_entries(self, post_thread_renderer):
4137 post_renderer = try_get(
4138 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4139 if not post_renderer:
4140 return
4141 # video attachment
4142 video_renderer = try_get(
4143 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4144 video_id = video_renderer.get('videoId')
4145 if video_id:
4146 entry = self._extract_video(video_renderer)
4147 if entry:
4148 yield entry
4149 # playlist attachment
4150 playlist_id = try_get(
4151 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
4152 if playlist_id:
4153 yield self.url_result(
4154 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4155 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4156 # inline video links
4157 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4158 for run in runs:
4159 if not isinstance(run, dict):
4160 continue
4161 ep_url = try_get(
4162 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
4163 if not ep_url:
4164 continue
4165 if not YoutubeIE.suitable(ep_url):
4166 continue
4167 ep_video_id = YoutubeIE._match_id(ep_url)
4168 if video_id == ep_video_id:
4169 continue
4170 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
4171
4172 def _post_thread_continuation_entries(self, post_thread_continuation):
4173 contents = post_thread_continuation.get('contents')
4174 if not isinstance(contents, list):
4175 return
4176 for content in contents:
4177 renderer = content.get('backstagePostThreadRenderer')
4178 if isinstance(renderer, dict):
4179 yield from self._post_thread_entries(renderer)
4180 continue
4181 renderer = content.get('videoRenderer')
4182 if isinstance(renderer, dict):
4183 yield self._video_entry(renderer)
4184
4185 r''' # unused
4186 def _rich_grid_entries(self, contents):
4187 for content in contents:
4188 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4189 if video_renderer:
4190 entry = self._video_entry(video_renderer)
4191 if entry:
4192 yield entry
4193 '''
4194
4195 def _extract_entries(self, parent_renderer, continuation_list):
4196 # continuation_list is modified in-place with continuation_list = [continuation_token]
4197 continuation_list[:] = [None]
4198 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4199 for content in contents:
4200 if not isinstance(content, dict):
4201 continue
4202 is_renderer = traverse_obj(
4203 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4204 expected_type=dict)
4205 if not is_renderer:
4206 renderer = content.get('richItemRenderer')
4207 if renderer:
4208 for entry in self._rich_entries(renderer):
4209 yield entry
4210 continuation_list[0] = self._extract_continuation(parent_renderer)
4211 continue
4212 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4213 for isr_content in isr_contents:
4214 if not isinstance(isr_content, dict):
4215 continue
4216
4217 known_renderers = {
4218 'playlistVideoListRenderer': self._playlist_entries,
4219 'gridRenderer': self._grid_entries,
4220 'reelShelfRenderer': self._grid_entries,
4221 'shelfRenderer': self._shelf_entries,
4222 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
4223 'backstagePostThreadRenderer': self._post_thread_entries,
4224 'videoRenderer': lambda x: [self._video_entry(x)],
4225 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4226 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
4227 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
4228 }
4229 for key, renderer in isr_content.items():
4230 if key not in known_renderers:
4231 continue
4232 for entry in known_renderers[key](renderer):
4233 if entry:
4234 yield entry
4235 continuation_list[0] = self._extract_continuation(renderer)
4236 break
4237
4238 if not continuation_list[0]:
4239 continuation_list[0] = self._extract_continuation(is_renderer)
4240
4241 if not continuation_list[0]:
4242 continuation_list[0] = self._extract_continuation(parent_renderer)
4243
4244 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4245 continuation_list = [None]
4246 extract_entries = lambda x: self._extract_entries(x, continuation_list)
4247 tab_content = try_get(tab, lambda x: x['content'], dict)
4248 if not tab_content:
4249 return
4250 parent_renderer = (
4251 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4252 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
4253 yield from extract_entries(parent_renderer)
4254 continuation = continuation_list[0]
4255
4256 for page_num in itertools.count(1):
4257 if not continuation:
4258 break
4259 headers = self.generate_api_headers(
4260 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
4261 response = self._extract_response(
4262 item_id=f'{item_id} page {page_num}',
4263 query=continuation, headers=headers, ytcfg=ytcfg,
4264 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4265
4266 if not response:
4267 break
4268 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4269 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4270 visitor_data = self._extract_visitor_data(response) or visitor_data
4271
4272 known_continuation_renderers = {
4273 'playlistVideoListContinuation': self._playlist_entries,
4274 'gridContinuation': self._grid_entries,
4275 'itemSectionContinuation': self._post_thread_continuation_entries,
4276 'sectionListContinuation': extract_entries, # for feeds
4277 }
4278 continuation_contents = try_get(
4279 response, lambda x: x['continuationContents'], dict) or {}
4280 continuation_renderer = None
4281 for key, value in continuation_contents.items():
4282 if key not in known_continuation_renderers:
4283 continue
4284 continuation_renderer = value
4285 continuation_list = [None]
4286 yield from known_continuation_renderers[key](continuation_renderer)
4287 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
4288 break
4289 if continuation_renderer:
4290 continue
4291
4292 known_renderers = {
4293 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
4294 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4295 'gridVideoRenderer': (self._grid_entries, 'items'),
4296 'gridChannelRenderer': (self._grid_entries, 'items'),
4297 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
4298 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
4299 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
4300 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
4301 }
4302 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4303 continuation_items = try_get(
4304 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
4305 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
4306 video_items_renderer = None
4307 for key, value in continuation_item.items():
4308 if key not in known_renderers:
4309 continue
4310 video_items_renderer = {known_renderers[key][1]: continuation_items}
4311 continuation_list = [None]
4312 yield from known_renderers[key][0](video_items_renderer)
4313 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
4314 break
4315 if video_items_renderer:
4316 continue
4317 break
4318
4319 @staticmethod
4320 def _extract_selected_tab(tabs, fatal=True):
4321 for tab in tabs:
4322 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4323 if renderer.get('selected') is True:
4324 return renderer
4325 else:
4326 if fatal:
4327 raise ExtractorError('Unable to find selected tab')
4328
4329 def _extract_uploader(self, data):
4330 uploader = {}
4331 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
4332 owner = try_get(
4333 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4334 if owner:
4335 owner_text = owner.get('text')
4336 uploader['uploader'] = self._search_regex(
4337 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
4338 uploader['uploader_id'] = try_get(
4339 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
4340 uploader['uploader_url'] = urljoin(
4341 'https://www.youtube.com/',
4342 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
4343 return {k: v for k, v in uploader.items() if v is not None}
4344
4345 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
4346 playlist_id = title = description = channel_url = channel_name = channel_id = None
4347 tags = []
4348
4349 selected_tab = self._extract_selected_tab(tabs)
4350 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4351 renderer = try_get(
4352 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4353 if renderer:
4354 channel_name = renderer.get('title')
4355 channel_url = renderer.get('channelUrl')
4356 channel_id = renderer.get('externalId')
4357 else:
4358 renderer = try_get(
4359 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
4360
4361 if renderer:
4362 title = renderer.get('title')
4363 description = renderer.get('description', '')
4364 playlist_id = channel_id
4365 tags = renderer.get('keywords', '').split()
4366
4367 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4368 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4369 def _get_uncropped(url):
4370 return url_or_none((url or '').split('=')[0] + '=s0')
4371
4372 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4373 if avatar_thumbnails:
4374 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4375 if uncropped_avatar:
4376 avatar_thumbnails.append({
4377 'url': uncropped_avatar,
4378 'id': 'avatar_uncropped',
4379 'preference': 1
4380 })
4381
4382 channel_banners = self._extract_thumbnails(
4383 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4384 for banner in channel_banners:
4385 banner['preference'] = -10
4386
4387 if channel_banners:
4388 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4389 if uncropped_banner:
4390 channel_banners.append({
4391 'url': uncropped_banner,
4392 'id': 'banner_uncropped',
4393 'preference': -5
4394 })
4395
4396 primary_thumbnails = self._extract_thumbnails(
4397 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
4398
4399 if playlist_id is None:
4400 playlist_id = item_id
4401
4402 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
4403 last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
4404 if title is None:
4405 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
4406 title += format_field(selected_tab, 'title', ' - %s')
4407 title += format_field(selected_tab, 'expandedText', ' - %s')
4408
4409 metadata = {
4410 'playlist_id': playlist_id,
4411 'playlist_title': title,
4412 'playlist_description': description,
4413 'uploader': channel_name,
4414 'uploader_id': channel_id,
4415 'uploader_url': channel_url,
4416 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
4417 'tags': tags,
4418 'view_count': self._get_count(playlist_stats, 1),
4419 'availability': self._extract_availability(data),
4420 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
4421 'playlist_count': self._get_count(playlist_stats, 0),
4422 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
4423 }
4424 if not channel_id:
4425 metadata.update(self._extract_uploader(data))
4426 metadata.update({
4427 'channel': metadata['uploader'],
4428 'channel_id': metadata['uploader_id'],
4429 'channel_url': metadata['uploader_url']})
4430 return self.playlist_result(
4431 self._entries(
4432 selected_tab, playlist_id, ytcfg,
4433 self._extract_account_syncid(ytcfg, data),
4434 self._extract_visitor_data(data, ytcfg)),
4435 **metadata)
4436
4437 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
4438 first_id = last_id = response = None
4439 for page_num in itertools.count(1):
4440 videos = list(self._playlist_entries(playlist))
4441 if not videos:
4442 return
4443 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4444 if start >= len(videos):
4445 return
4446 yield from videos[start:]
4447 first_id = first_id or videos[0]['id']
4448 last_id = videos[-1]['id']
4449 watch_endpoint = try_get(
4450 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4451 headers = self.generate_api_headers(
4452 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4453 visitor_data=self._extract_visitor_data(response, data, ytcfg))
4454 query = {
4455 'playlistId': playlist_id,
4456 'videoId': watch_endpoint.get('videoId') or last_id,
4457 'index': watch_endpoint.get('index') or len(videos),
4458 'params': watch_endpoint.get('params') or 'OAE%3D'
4459 }
4460 response = self._extract_response(
4461 item_id='%s page %d' % (playlist_id, page_num),
4462 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4463 check_get_keys='contents'
4464 )
4465 playlist = try_get(
4466 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4467
4468 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
4469 title = playlist.get('title') or try_get(
4470 data, lambda x: x['titleText']['simpleText'], str)
4471 playlist_id = playlist.get('playlistId') or item_id
4472
4473 # Delegating everything except mix playlists to regular tab-based playlist URL
4474 playlist_url = urljoin(url, try_get(
4475 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4476 str))
4477
4478 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4479 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4480 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4481
4482 if playlist_url and playlist_url != url and not is_known_unviewable:
4483 return self.url_result(
4484 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4485 video_title=title)
4486
4487 return self.playlist_result(
4488 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
4489 playlist_id=playlist_id, playlist_title=title)
4490
4491 def _extract_availability(self, data):
4492 """
4493 Gets the availability of a given playlist/tab.
4494 Note: Unless YouTube tells us explicitly, we do not assume it is public
4495 @param data: response
4496 """
4497 is_private = is_unlisted = None
4498 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4499 badge_labels = self._extract_badges(renderer)
4500
4501 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4502 privacy_dropdown_entries = try_get(
4503 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4504 for renderer_dict in privacy_dropdown_entries:
4505 is_selected = try_get(
4506 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4507 if not is_selected:
4508 continue
4509 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4510 if label:
4511 badge_labels.add(label.lower())
4512 break
4513
4514 for badge_label in badge_labels:
4515 if badge_label == 'unlisted':
4516 is_unlisted = True
4517 elif badge_label == 'private':
4518 is_private = True
4519 elif badge_label == 'public':
4520 is_unlisted = is_private = False
4521 return self._availability(is_private, False, False, False, is_unlisted)
4522
4523 @staticmethod
4524 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4525 sidebar_renderer = try_get(
4526 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4527 for item in sidebar_renderer:
4528 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4529 if renderer:
4530 return renderer
4531
4532 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
4533 """
4534 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4535 """
4536 browse_id = params = None
4537 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4538 if not renderer:
4539 return
4540 menu_renderer = try_get(
4541 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4542 for menu_item in menu_renderer:
4543 if not isinstance(menu_item, dict):
4544 continue
4545 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4546 text = try_get(
4547 nav_item_renderer, lambda x: x['text']['simpleText'], str)
4548 if not text or text.lower() != 'show unavailable videos':
4549 continue
4550 browse_endpoint = try_get(
4551 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4552 browse_id = browse_endpoint.get('browseId')
4553 params = browse_endpoint.get('params')
4554 break
4555
4556 headers = self.generate_api_headers(
4557 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4558 visitor_data=self._extract_visitor_data(data, ytcfg))
4559 query = {
4560 'params': params or 'wgYCCAA=',
4561 'browseId': browse_id or 'VL%s' % item_id
4562 }
4563 return self._extract_response(
4564 item_id=item_id, headers=headers, query=query,
4565 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4566 note='Downloading API JSON with unavailable videos')
4567
4568 @functools.cached_property
4569 def skip_webpage(self):
4570 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4571
4572 def _extract_webpage(self, url, item_id, fatal=True):
4573 webpage, data = None, None
4574 for retry in self.RetryManager(fatal=fatal):
4575 try:
4576 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
4577 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4578 except ExtractorError as e:
4579 if isinstance(e.cause, network_exceptions):
4580 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
4581 retry.error = e
4582 continue
4583 self._error_or_warning(e, fatal=fatal)
4584 break
4585
4586 try:
4587 self._extract_and_report_alerts(data)
4588 except ExtractorError as e:
4589 self._error_or_warning(e, fatal=fatal)
4590 break
4591
4592 # Sometimes youtube returns a webpage with incomplete ytInitialData
4593 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4594 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
4595 retry.error = ExtractorError('Incomplete yt initial data received')
4596 continue
4597
4598 return webpage, data
4599
4600 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4601 """Use if failed to extract ytcfg (and data) from initial webpage"""
4602 if not ytcfg and self.is_authenticated:
4603 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4604 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4605 raise ExtractorError(
4606 f'{msg}. If you are not downloading private content, or '
4607 'your cookies are only for the first account and channel,'
4608 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4609 expected=True)
4610 self.report_warning(msg, only_once=True)
4611
4612 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4613 data = None
4614 if not self.skip_webpage:
4615 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4616 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4617 # Reject webpage data if redirected to home page without explicitly requesting
4618 selected_tab = self._extract_selected_tab(traverse_obj(
4619 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
4620 if (url != 'https://www.youtube.com/feed/recommended'
4621 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4622 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4623 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4624 if fatal:
4625 raise ExtractorError(msg, expected=True)
4626 self.report_warning(msg, only_once=True)
4627 if not data:
4628 self._report_playlist_authcheck(ytcfg, fatal=fatal)
4629 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4630 return data, ytcfg
4631
4632 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4633 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4634 resolve_response = self._extract_response(
4635 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4636 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4637 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4638 for ep_key, ep in endpoints.items():
4639 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4640 if params:
4641 return self._extract_response(
4642 item_id=item_id, query=params, ep=ep, headers=headers,
4643 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4644 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
4645 err_note = 'Failed to resolve url (does the playlist exist?)'
4646 if fatal:
4647 raise ExtractorError(err_note, expected=True)
4648 self.report_warning(err_note, item_id)
4649
4650 _SEARCH_PARAMS = None
4651
4652 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
4653 data = {'query': query}
4654 if params is NO_DEFAULT:
4655 params = self._SEARCH_PARAMS
4656 if params:
4657 data['params'] = params
4658
4659 content_keys = (
4660 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4661 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4662 # ytmusic search
4663 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4664 ('continuationContents', ),
4665 )
4666 display_id = f'query "{query}"'
4667 check_get_keys = tuple({keys[0] for keys in content_keys})
4668 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4669 self._report_playlist_authcheck(ytcfg, fatal=False)
4670
4671 continuation_list = [None]
4672 search = None
4673 for page_num in itertools.count(1):
4674 data.update(continuation_list[0] or {})
4675 headers = self.generate_api_headers(
4676 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
4677 search = self._extract_response(
4678 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4679 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
4680 slr_contents = traverse_obj(search, *content_keys)
4681 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
4682 if not continuation_list[0]:
4683 break
4684
4685
4686 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4687 IE_DESC = 'YouTube Tabs'
4688 _VALID_URL = r'''(?x:
4689 https?://
4690 (?:\w+\.)?
4691 (?:
4692 youtube(?:kids)?\.com|
4693 %(invidious)s
4694 )/
4695 (?:
4696 (?P<channel_type>channel|c|user|browse)/|
4697 (?P<not_channel>
4698 feed/|hashtag/|
4699 (?:playlist|watch)\?.*?\blist=
4700 )|
4701 (?!(?:%(reserved_names)s)\b) # Direct URLs
4702 )
4703 (?P<id>[^/?\#&]+)
4704 )''' % {
4705 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4706 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4707 }
4708 IE_NAME = 'youtube:tab'
4709
4710 _TESTS = [{
4711 'note': 'playlists, multipage',
4712 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4713 'playlist_mincount': 94,
4714 'info_dict': {
4715 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4716 'title': 'Igor Kleiner - Playlists',
4717 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4718 'uploader': 'Igor Kleiner',
4719 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4720 'channel': 'Igor Kleiner',
4721 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4722 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4723 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4724 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4725 'channel_follower_count': int
4726 },
4727 }, {
4728 'note': 'playlists, multipage, different order',
4729 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4730 'playlist_mincount': 94,
4731 'info_dict': {
4732 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4733 'title': 'Igor Kleiner - Playlists',
4734 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4735 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4736 'uploader': 'Igor Kleiner',
4737 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4738 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4739 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4740 'channel': 'Igor Kleiner',
4741 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4742 'channel_follower_count': int
4743 },
4744 }, {
4745 'note': 'playlists, series',
4746 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4747 'playlist_mincount': 5,
4748 'info_dict': {
4749 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4750 'title': '3Blue1Brown - Playlists',
4751 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4752 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4753 'uploader': '3Blue1Brown',
4754 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4755 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4756 'channel': '3Blue1Brown',
4757 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4758 'tags': ['Mathematics'],
4759 'channel_follower_count': int
4760 },
4761 }, {
4762 'note': 'playlists, singlepage',
4763 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4764 'playlist_mincount': 4,
4765 'info_dict': {
4766 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4767 'title': 'ThirstForScience - Playlists',
4768 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4769 'uploader': 'ThirstForScience',
4770 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4771 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4772 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4773 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4774 'tags': 'count:13',
4775 'channel': 'ThirstForScience',
4776 'channel_follower_count': int
4777 }
4778 }, {
4779 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4780 'only_matching': True,
4781 }, {
4782 'note': 'basic, single video playlist',
4783 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4784 'info_dict': {
4785 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4786 'uploader': 'Sergey M.',
4787 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4788 'title': 'youtube-dl public playlist',
4789 'description': '',
4790 'tags': [],
4791 'view_count': int,
4792 'modified_date': '20201130',
4793 'channel': 'Sergey M.',
4794 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4795 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4796 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4797 },
4798 'playlist_count': 1,
4799 }, {
4800 'note': 'empty playlist',
4801 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4802 'info_dict': {
4803 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4804 'uploader': 'Sergey M.',
4805 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4806 'title': 'youtube-dl empty playlist',
4807 'tags': [],
4808 'channel': 'Sergey M.',
4809 'description': '',
4810 'modified_date': '20160902',
4811 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4812 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4813 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4814 },
4815 'playlist_count': 0,
4816 }, {
4817 'note': 'Home tab',
4818 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
4819 'info_dict': {
4820 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4821 'title': 'lex will - Home',
4822 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4823 'uploader': 'lex will',
4824 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4825 'channel': 'lex will',
4826 'tags': ['bible', 'history', 'prophesy'],
4827 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4828 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4829 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4830 'channel_follower_count': int
4831 },
4832 'playlist_mincount': 2,
4833 }, {
4834 'note': 'Videos tab',
4835 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4836 'info_dict': {
4837 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4838 'title': 'lex will - Videos',
4839 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4840 'uploader': 'lex will',
4841 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4842 'tags': ['bible', 'history', 'prophesy'],
4843 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4844 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4845 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4846 'channel': 'lex will',
4847 'channel_follower_count': int
4848 },
4849 'playlist_mincount': 975,
4850 }, {
4851 'note': 'Videos tab, sorted by popular',
4852 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4853 'info_dict': {
4854 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4855 'title': 'lex will - Videos',
4856 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4857 'uploader': 'lex will',
4858 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4859 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4860 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4861 'channel': 'lex will',
4862 'tags': ['bible', 'history', 'prophesy'],
4863 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4864 'channel_follower_count': int
4865 },
4866 'playlist_mincount': 199,
4867 }, {
4868 'note': 'Playlists tab',
4869 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4870 'info_dict': {
4871 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4872 'title': 'lex will - Playlists',
4873 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4874 'uploader': 'lex will',
4875 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4876 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4877 'channel': 'lex will',
4878 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4879 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4880 'tags': ['bible', 'history', 'prophesy'],
4881 'channel_follower_count': int
4882 },
4883 'playlist_mincount': 17,
4884 }, {
4885 'note': 'Community tab',
4886 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4887 'info_dict': {
4888 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4889 'title': 'lex will - Community',
4890 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4891 'uploader': 'lex will',
4892 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4893 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4894 'channel': 'lex will',
4895 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4896 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4897 'tags': ['bible', 'history', 'prophesy'],
4898 'channel_follower_count': int
4899 },
4900 'playlist_mincount': 18,
4901 }, {
4902 'note': 'Channels tab',
4903 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4904 'info_dict': {
4905 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4906 'title': 'lex will - Channels',
4907 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4908 'uploader': 'lex will',
4909 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4910 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4911 'channel': 'lex will',
4912 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4913 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4914 'tags': ['bible', 'history', 'prophesy'],
4915 'channel_follower_count': int
4916 },
4917 'playlist_mincount': 12,
4918 }, {
4919 'note': 'Search tab',
4920 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4921 'playlist_mincount': 40,
4922 'info_dict': {
4923 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4924 'title': '3Blue1Brown - Search - linear algebra',
4925 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4926 'uploader': '3Blue1Brown',
4927 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4928 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4929 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4930 'tags': ['Mathematics'],
4931 'channel': '3Blue1Brown',
4932 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4933 'channel_follower_count': int
4934 },
4935 }, {
4936 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4937 'only_matching': True,
4938 }, {
4939 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4940 'only_matching': True,
4941 }, {
4942 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4943 'only_matching': True,
4944 }, {
4945 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4946 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4947 'info_dict': {
4948 'title': '29C3: Not my department',
4949 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4950 'uploader': 'Christiaan008',
4951 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4952 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
4953 'tags': [],
4954 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4955 'view_count': int,
4956 'modified_date': '20150605',
4957 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4958 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4959 'channel': 'Christiaan008',
4960 },
4961 'playlist_count': 96,
4962 }, {
4963 'note': 'Large playlist',
4964 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4965 'info_dict': {
4966 'title': 'Uploads from Cauchemar',
4967 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4968 'uploader': 'Cauchemar',
4969 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4970 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
4971 'tags': [],
4972 'modified_date': r're:\d{8}',
4973 'channel': 'Cauchemar',
4974 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
4975 'view_count': int,
4976 'description': '',
4977 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4978 },
4979 'playlist_mincount': 1123,
4980 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
4981 }, {
4982 'note': 'even larger playlist, 8832 videos',
4983 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4984 'only_matching': True,
4985 }, {
4986 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4987 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4988 'info_dict': {
4989 'title': 'Uploads from Interstellar Movie',
4990 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4991 'uploader': 'Interstellar Movie',
4992 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4993 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
4994 'tags': [],
4995 'view_count': int,
4996 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4997 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
4998 'channel': 'Interstellar Movie',
4999 'description': '',
5000 'modified_date': r're:\d{8}',
5001 },
5002 'playlist_mincount': 21,
5003 }, {
5004 'note': 'Playlist with "show unavailable videos" button',
5005 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5006 'info_dict': {
5007 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5008 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5009 'uploader': 'Phim Siêu Nhân Nhật Bản',
5010 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5011 'view_count': int,
5012 'channel': 'Phim Siêu Nhân Nhật Bản',
5013 'tags': [],
5014 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5015 'description': '',
5016 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5017 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5018 'modified_date': r're:\d{8}',
5019 },
5020 'playlist_mincount': 200,
5021 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5022 }, {
5023 'note': 'Playlist with unavailable videos in page 7',
5024 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5025 'info_dict': {
5026 'title': 'Uploads from BlankTV',
5027 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5028 'uploader': 'BlankTV',
5029 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5030 'channel': 'BlankTV',
5031 'channel_url': 'https://www.youtube.com/c/blanktv',
5032 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5033 'view_count': int,
5034 'tags': [],
5035 'uploader_url': 'https://www.youtube.com/c/blanktv',
5036 'modified_date': r're:\d{8}',
5037 'description': '',
5038 },
5039 'playlist_mincount': 1000,
5040 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5041 }, {
5042 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5043 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5044 'info_dict': {
5045 'title': 'Data Analysis with Dr Mike Pound',
5046 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5047 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5048 'uploader': 'Computerphile',
5049 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5050 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5051 'tags': [],
5052 'view_count': int,
5053 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5054 'channel_url': 'https://www.youtube.com/user/Computerphile',
5055 'channel': 'Computerphile',
5056 },
5057 'playlist_mincount': 11,
5058 }, {
5059 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5060 'only_matching': True,
5061 }, {
5062 'note': 'Playlist URL that does not actually serve a playlist',
5063 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5064 'info_dict': {
5065 'id': 'FqZTN594JQw',
5066 'ext': 'webm',
5067 'title': "Smiley's People 01 detective, Adventure Series, Action",
5068 'uploader': 'STREEM',
5069 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5070 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5071 'upload_date': '20150526',
5072 'license': 'Standard YouTube License',
5073 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5074 'categories': ['People & Blogs'],
5075 'tags': list,
5076 'view_count': int,
5077 'like_count': int,
5078 },
5079 'params': {
5080 'skip_download': True,
5081 },
5082 'skip': 'This video is not available.',
5083 'add_ie': [YoutubeIE.ie_key()],
5084 }, {
5085 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5086 'only_matching': True,
5087 }, {
5088 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5089 'only_matching': True,
5090 }, {
5091 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5092 'info_dict': {
5093 'id': 'Wq15eF5vCbI', # This will keep changing
5094 'ext': 'mp4',
5095 'title': str,
5096 'uploader': 'Sky News',
5097 'uploader_id': 'skynews',
5098 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5099 'upload_date': r're:\d{8}',
5100 'description': str,
5101 'categories': ['News & Politics'],
5102 'tags': list,
5103 'like_count': int,
5104 'release_timestamp': 1642502819,
5105 'channel': 'Sky News',
5106 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5107 'age_limit': 0,
5108 'view_count': int,
5109 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
5110 'playable_in_embed': True,
5111 'release_date': '20220118',
5112 'availability': 'public',
5113 'live_status': 'is_live',
5114 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
5115 'channel_follower_count': int
5116 },
5117 'params': {
5118 'skip_download': True,
5119 },
5120 'expected_warnings': ['Ignoring subtitle tracks found in '],
5121 }, {
5122 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5123 'info_dict': {
5124 'id': 'a48o2S1cPoo',
5125 'ext': 'mp4',
5126 'title': 'The Young Turks - Live Main Show',
5127 'uploader': 'The Young Turks',
5128 'uploader_id': 'TheYoungTurks',
5129 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5130 'upload_date': '20150715',
5131 'license': 'Standard YouTube License',
5132 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5133 'categories': ['News & Politics'],
5134 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5135 'like_count': int,
5136 },
5137 'params': {
5138 'skip_download': True,
5139 },
5140 'only_matching': True,
5141 }, {
5142 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5143 'only_matching': True,
5144 }, {
5145 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5146 'only_matching': True,
5147 }, {
5148 'note': 'A channel that is not live. Should raise error',
5149 'url': 'https://www.youtube.com/user/numberphile/live',
5150 'only_matching': True,
5151 }, {
5152 'url': 'https://www.youtube.com/feed/trending',
5153 'only_matching': True,
5154 }, {
5155 'url': 'https://www.youtube.com/feed/library',
5156 'only_matching': True,
5157 }, {
5158 'url': 'https://www.youtube.com/feed/history',
5159 'only_matching': True,
5160 }, {
5161 'url': 'https://www.youtube.com/feed/subscriptions',
5162 'only_matching': True,
5163 }, {
5164 'url': 'https://www.youtube.com/feed/watch_later',
5165 'only_matching': True,
5166 }, {
5167 'note': 'Recommended - redirects to home page.',
5168 'url': 'https://www.youtube.com/feed/recommended',
5169 'only_matching': True,
5170 }, {
5171 'note': 'inline playlist with not always working continuations',
5172 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5173 'only_matching': True,
5174 }, {
5175 'url': 'https://www.youtube.com/course',
5176 'only_matching': True,
5177 }, {
5178 'url': 'https://www.youtube.com/zsecurity',
5179 'only_matching': True,
5180 }, {
5181 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5182 'only_matching': True,
5183 }, {
5184 'url': 'https://www.youtube.com/TheYoungTurks/live',
5185 'only_matching': True,
5186 }, {
5187 'url': 'https://www.youtube.com/hashtag/cctv9',
5188 'info_dict': {
5189 'id': 'cctv9',
5190 'title': '#cctv9',
5191 'tags': [],
5192 },
5193 'playlist_mincount': 350,
5194 }, {
5195 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5196 'only_matching': True,
5197 }, {
5198 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5199 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5200 'only_matching': True
5201 }, {
5202 'note': '/browse/ should redirect to /channel/',
5203 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5204 'only_matching': True
5205 }, {
5206 'note': 'VLPL, should redirect to playlist?list=PL...',
5207 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5208 'info_dict': {
5209 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5210 'uploader': 'NoCopyrightSounds',
5211 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5212 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5213 'title': 'NCS : All Releases 💿',
5214 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5215 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5216 'modified_date': r're:\d{8}',
5217 'view_count': int,
5218 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5219 'tags': [],
5220 'channel': 'NoCopyrightSounds',
5221 },
5222 'playlist_mincount': 166,
5223 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5224 }, {
5225 'note': 'Topic, should redirect to playlist?list=UU...',
5226 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5227 'info_dict': {
5228 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5229 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5230 'title': 'Uploads from Royalty Free Music - Topic',
5231 'uploader': 'Royalty Free Music - Topic',
5232 'tags': [],
5233 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5234 'channel': 'Royalty Free Music - Topic',
5235 'view_count': int,
5236 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5237 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5238 'modified_date': r're:\d{8}',
5239 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5240 'description': '',
5241 },
5242 'expected_warnings': [
5243 'The URL does not have a videos tab',
5244 r'[Uu]navailable videos (are|will be) hidden',
5245 ],
5246 'playlist_mincount': 101,
5247 }, {
5248 'note': 'Topic without a UU playlist',
5249 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5250 'info_dict': {
5251 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5252 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
5253 'tags': [],
5254 },
5255 'expected_warnings': [
5256 'the playlist redirect gave error',
5257 ],
5258 'playlist_mincount': 9,
5259 }, {
5260 'note': 'Youtube music Album',
5261 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5262 'info_dict': {
5263 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5264 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
5265 'tags': [],
5266 'view_count': int,
5267 'description': '',
5268 'availability': 'unlisted',
5269 'modified_date': r're:\d{8}',
5270 },
5271 'playlist_count': 50,
5272 }, {
5273 'note': 'unlisted single video playlist',
5274 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5275 'info_dict': {
5276 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5277 'uploader': 'colethedj',
5278 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5279 'title': 'yt-dlp unlisted playlist test',
5280 'availability': 'unlisted',
5281 'tags': [],
5282 'modified_date': '20220418',
5283 'channel': 'colethedj',
5284 'view_count': int,
5285 'description': '',
5286 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5287 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5288 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5289 },
5290 'playlist_count': 1,
5291 }, {
5292 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5293 'url': 'https://www.youtube.com/feed/recommended',
5294 'info_dict': {
5295 'id': 'recommended',
5296 'title': 'recommended',
5297 'tags': [],
5298 },
5299 'playlist_mincount': 50,
5300 'params': {
5301 'skip_download': True,
5302 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5303 },
5304 }, {
5305 'note': 'API Fallback: /videos tab, sorted by oldest first',
5306 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5307 'info_dict': {
5308 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5309 'title': 'Cody\'sLab - Videos',
5310 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5311 'uploader': 'Cody\'sLab',
5312 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5313 'channel': 'Cody\'sLab',
5314 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5315 'tags': [],
5316 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5317 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5318 'channel_follower_count': int
5319 },
5320 'playlist_mincount': 650,
5321 'params': {
5322 'skip_download': True,
5323 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5324 },
5325 }, {
5326 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5327 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5328 'info_dict': {
5329 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5330 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5331 'title': 'Uploads from Royalty Free Music - Topic',
5332 'uploader': 'Royalty Free Music - Topic',
5333 'modified_date': r're:\d{8}',
5334 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5335 'description': '',
5336 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5337 'tags': [],
5338 'channel': 'Royalty Free Music - Topic',
5339 'view_count': int,
5340 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5341 },
5342 'expected_warnings': [
5343 'does not have a videos tab',
5344 r'[Uu]navailable videos (are|will be) hidden',
5345 ],
5346 'playlist_mincount': 101,
5347 'params': {
5348 'skip_download': True,
5349 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5350 },
5351 }, {
5352 'note': 'non-standard redirect to regional channel',
5353 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5354 'only_matching': True
5355 }, {
5356 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5357 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5358 'info_dict': {
5359 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5360 'modified_date': '20220407',
5361 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5362 'tags': [],
5363 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5364 'uploader': 'pukkandan',
5365 'availability': 'unlisted',
5366 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5367 'channel': 'pukkandan',
5368 'description': 'Test for collaborative playlist',
5369 'title': 'yt-dlp test - collaborative playlist',
5370 'view_count': int,
5371 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5372 },
5373 'playlist_mincount': 2
5374 }]
5375
5376 @classmethod
5377 def suitable(cls, url):
5378 return False if YoutubeIE.suitable(url) else super().suitable(url)
5379
5380 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
5381
5382 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5383 def _real_extract(self, url, smuggled_data):
5384 item_id = self._match_id(url)
5385 url = urllib.parse.urlunparse(
5386 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
5387 compat_opts = self.get_param('compat_opts', [])
5388
5389 def get_mobj(url):
5390 mobj = self._URL_RE.match(url).groupdict()
5391 mobj.update((k, '') for k, v in mobj.items() if v is None)
5392 return mobj
5393
5394 mobj, redirect_warning = get_mobj(url), None
5395 # Youtube returns incomplete data if tabname is not lower case
5396 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
5397 if is_channel:
5398 if smuggled_data.get('is_music_url'):
5399 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
5400 item_id = item_id[2:]
5401 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5402 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
5403 mdata = self._extract_tab_endpoint(
5404 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5405 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
5406 get_all=False, expected_type=str)
5407 if not murl:
5408 raise ExtractorError('Failed to resolve album to playlist')
5409 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
5410 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5411 pre = f'https://www.youtube.com/channel/{item_id}'
5412
5413 original_tab_name = tab
5414 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5415 # Home URLs should redirect to /videos/
5416 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5417 'To download only the videos in the home page, add a "/featured" to the URL')
5418 tab = '/videos'
5419
5420 url = ''.join((pre, tab, post))
5421 mobj = get_mobj(url)
5422
5423 # Handle both video/playlist URLs
5424 qs = parse_qs(url)
5425 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
5426
5427 if not video_id and mobj['not_channel'].startswith('watch'):
5428 if not playlist_id:
5429 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
5430 raise ExtractorError('Unable to recognize tab page')
5431 # Common mistake: https://www.youtube.com/watch?list=playlist_id
5432 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5433 url = f'https://www.youtube.com/playlist?list={playlist_id}'
5434 mobj = get_mobj(url)
5435
5436 if video_id and playlist_id:
5437 if self.get_param('noplaylist'):
5438 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5439 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5440 ie=YoutubeIE.ie_key(), video_id=video_id)
5441 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
5442
5443 data, ytcfg = self._extract_data(url, item_id)
5444
5445 # YouTube may provide a non-standard redirect to the regional channel
5446 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5447 redirect_url = traverse_obj(
5448 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5449 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5450 redirect_url = ''.join((
5451 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5452 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5453 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5454
5455 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5456 if tabs:
5457 selected_tab = self._extract_selected_tab(tabs)
5458 selected_tab_name = selected_tab.get('title', '').lower()
5459 if selected_tab_name == 'home':
5460 selected_tab_name = 'featured'
5461 requested_tab_name = mobj['tab'][1:]
5462 if 'no-youtube-channel-redirect' not in compat_opts:
5463 if requested_tab_name == 'live': # Live tab should have redirected to the video
5464 raise UserNotLive(video_id=mobj['id'])
5465 if requested_tab_name not in ('', selected_tab_name):
5466 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5467 if not original_tab_name:
5468 if item_id[:2] == 'UC':
5469 # Topic channels don't have /videos. Use the equivalent playlist instead
5470 pl_id = f'UU{item_id[2:]}'
5471 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5472 try:
5473 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5474 except ExtractorError:
5475 redirect_warning += ' and the playlist redirect gave error'
5476 else:
5477 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5478 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5479 if selected_tab_name and selected_tab_name != requested_tab_name:
5480 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5481 else:
5482 raise ExtractorError(redirect_warning, expected=True)
5483
5484 if redirect_warning:
5485 self.to_screen(redirect_warning)
5486 self.write_debug(f'Final URL: {url}')
5487
5488 # YouTube sometimes provides a button to reload playlist with unavailable videos.
5489 if 'no-youtube-unavailable-videos' not in compat_opts:
5490 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
5491 self._extract_and_report_alerts(data, only_once=True)
5492 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5493 if tabs:
5494 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
5495
5496 playlist = traverse_obj(
5497 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
5498 if playlist:
5499 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
5500
5501 video_id = traverse_obj(
5502 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
5503 if video_id:
5504 if mobj['tab'] != '/live': # live tab is expected to redirect to video
5505 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5506 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5507 ie=YoutubeIE.ie_key(), video_id=video_id)
5508
5509 raise ExtractorError('Unable to recognize tab page')
5510
5511
5512 class YoutubePlaylistIE(InfoExtractor):
5513 IE_DESC = 'YouTube playlists'
5514 _VALID_URL = r'''(?x)(?:
5515 (?:https?://)?
5516 (?:\w+\.)?
5517 (?:
5518 (?:
5519 youtube(?:kids)?\.com|
5520 %(invidious)s
5521 )
5522 /.*?\?.*?\blist=
5523 )?
5524 (?P<id>%(playlist_id)s)
5525 )''' % {
5526 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5527 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5528 }
5529 IE_NAME = 'youtube:playlist'
5530 _TESTS = [{
5531 'note': 'issue #673',
5532 'url': 'PLBB231211A4F62143',
5533 'info_dict': {
5534 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5535 'id': 'PLBB231211A4F62143',
5536 'uploader': 'Wickman',
5537 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5538 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
5539 'view_count': int,
5540 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5541 'modified_date': r're:\d{8}',
5542 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5543 'channel': 'Wickman',
5544 'tags': [],
5545 'channel_url': 'https://www.youtube.com/user/Wickydoo',
5546 },
5547 'playlist_mincount': 29,
5548 }, {
5549 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5550 'info_dict': {
5551 'title': 'YDL_safe_search',
5552 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5553 },
5554 'playlist_count': 2,
5555 'skip': 'This playlist is private',
5556 }, {
5557 'note': 'embedded',
5558 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5559 'playlist_count': 4,
5560 'info_dict': {
5561 'title': 'JODA15',
5562 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5563 'uploader': 'milan',
5564 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5565 'description': '',
5566 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5567 'tags': [],
5568 'modified_date': '20140919',
5569 'view_count': int,
5570 'channel': 'milan',
5571 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5572 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5573 },
5574 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5575 }, {
5576 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5577 'playlist_mincount': 455,
5578 'info_dict': {
5579 'title': '2018 Chinese New Singles (11/6 updated)',
5580 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5581 'uploader': 'LBK',
5582 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5583 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
5584 'channel': 'LBK',
5585 'view_count': int,
5586 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5587 'tags': [],
5588 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5589 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5590 'modified_date': r're:\d{8}',
5591 },
5592 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5593 }, {
5594 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5595 'only_matching': True,
5596 }, {
5597 # music album playlist
5598 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5599 'only_matching': True,
5600 }]
5601
5602 @classmethod
5603 def suitable(cls, url):
5604 if YoutubeTabIE.suitable(url):
5605 return False
5606 from ..utils import parse_qs
5607 qs = parse_qs(url)
5608 if qs.get('v', [None])[0]:
5609 return False
5610 return super().suitable(url)
5611
5612 def _real_extract(self, url):
5613 playlist_id = self._match_id(url)
5614 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
5615 url = update_url_query(
5616 'https://www.youtube.com/playlist',
5617 parse_qs(url) or {'list': playlist_id})
5618 if is_music_url:
5619 url = smuggle_url(url, {'is_music_url': True})
5620 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5621
5622
5623 class YoutubeYtBeIE(InfoExtractor):
5624 IE_DESC = 'youtu.be'
5625 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5626 _TESTS = [{
5627 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5628 'info_dict': {
5629 'id': 'yeWKywCrFtk',
5630 'ext': 'mp4',
5631 'title': 'Small Scale Baler and Braiding Rugs',
5632 'uploader': 'Backus-Page House Museum',
5633 'uploader_id': 'backuspagemuseum',
5634 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5635 'upload_date': '20161008',
5636 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5637 'categories': ['Nonprofits & Activism'],
5638 'tags': list,
5639 'like_count': int,
5640 'age_limit': 0,
5641 'playable_in_embed': True,
5642 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5643 'channel': 'Backus-Page House Museum',
5644 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5645 'live_status': 'not_live',
5646 'view_count': int,
5647 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5648 'availability': 'public',
5649 'duration': 59,
5650 'comment_count': int,
5651 'channel_follower_count': int
5652 },
5653 'params': {
5654 'noplaylist': True,
5655 'skip_download': True,
5656 },
5657 }, {
5658 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
5659 'only_matching': True,
5660 }]
5661
5662 def _real_extract(self, url):
5663 mobj = self._match_valid_url(url)
5664 video_id = mobj.group('id')
5665 playlist_id = mobj.group('playlist_id')
5666 return self.url_result(
5667 update_url_query('https://www.youtube.com/watch', {
5668 'v': video_id,
5669 'list': playlist_id,
5670 'feature': 'youtu.be',
5671 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5672
5673
5674 class YoutubeLivestreamEmbedIE(InfoExtractor):
5675 IE_DESC = 'YouTube livestream embeds'
5676 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5677 _TESTS = [{
5678 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5679 'only_matching': True,
5680 }]
5681
5682 def _real_extract(self, url):
5683 channel_id = self._match_id(url)
5684 return self.url_result(
5685 f'https://www.youtube.com/channel/{channel_id}/live',
5686 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5687
5688
5689 class YoutubeYtUserIE(InfoExtractor):
5690 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
5691 IE_NAME = 'youtube:user'
5692 _VALID_URL = r'ytuser:(?P<id>.+)'
5693 _TESTS = [{
5694 'url': 'ytuser:phihag',
5695 'only_matching': True,
5696 }]
5697
5698 def _real_extract(self, url):
5699 user_id = self._match_id(url)
5700 return self.url_result(
5701 'https://www.youtube.com/user/%s/videos' % user_id,
5702 ie=YoutubeTabIE.ie_key(), video_id=user_id)
5703
5704
5705 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
5706 IE_NAME = 'youtube:favorites'
5707 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
5708 _VALID_URL = r':ytfav(?:ou?rite)?s?'
5709 _LOGIN_REQUIRED = True
5710 _TESTS = [{
5711 'url': ':ytfav',
5712 'only_matching': True,
5713 }, {
5714 'url': ':ytfavorites',
5715 'only_matching': True,
5716 }]
5717
5718 def _real_extract(self, url):
5719 return self.url_result(
5720 'https://www.youtube.com/playlist?list=LL',
5721 ie=YoutubeTabIE.ie_key())
5722
5723
5724 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
5725 IE_NAME = 'youtube:notif'
5726 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
5727 _VALID_URL = r':ytnotif(?:ication)?s?'
5728 _LOGIN_REQUIRED = True
5729 _TESTS = [{
5730 'url': ':ytnotif',
5731 'only_matching': True,
5732 }, {
5733 'url': ':ytnotifications',
5734 'only_matching': True,
5735 }]
5736
5737 def _extract_notification_menu(self, response, continuation_list):
5738 notification_list = traverse_obj(
5739 response,
5740 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
5741 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
5742 expected_type=list) or []
5743 continuation_list[0] = None
5744 for item in notification_list:
5745 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
5746 if entry:
5747 yield entry
5748 continuation = item.get('continuationItemRenderer')
5749 if continuation:
5750 continuation_list[0] = continuation
5751
5752 def _extract_notification_renderer(self, notification):
5753 video_id = traverse_obj(
5754 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
5755 url = f'https://www.youtube.com/watch?v={video_id}'
5756 channel_id = None
5757 if not video_id:
5758 browse_ep = traverse_obj(
5759 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
5760 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
5761 post_id = self._search_regex(
5762 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
5763 'post id', default=None)
5764 if not channel_id or not post_id:
5765 return
5766 # The direct /post url redirects to this in the browser
5767 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
5768
5769 channel = traverse_obj(
5770 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
5771 expected_type=str)
5772 notification_title = self._get_text(notification, 'shortMessage')
5773 if notification_title:
5774 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
5775 # TODO: handle recommended videos
5776 title = self._search_regex(
5777 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
5778 'video title', default=None)
5779 upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
5780 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
5781 else None)
5782 return {
5783 '_type': 'url',
5784 'url': url,
5785 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
5786 'video_id': video_id,
5787 'title': title,
5788 'channel_id': channel_id,
5789 'channel': channel,
5790 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5791 'upload_date': upload_date,
5792 }
5793
5794 def _notification_menu_entries(self, ytcfg):
5795 continuation_list = [None]
5796 response = None
5797 for page in itertools.count(1):
5798 ctoken = traverse_obj(
5799 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
5800 response = self._extract_response(
5801 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
5802 ep='notification/get_notification_menu', check_get_keys='actions',
5803 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
5804 yield from self._extract_notification_menu(response, continuation_list)
5805 if not continuation_list[0]:
5806 break
5807
5808 def _real_extract(self, url):
5809 display_id = 'notifications'
5810 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
5811 self._report_playlist_authcheck(ytcfg)
5812 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
5813
5814
5815 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5816 IE_DESC = 'YouTube search'
5817 IE_NAME = 'youtube:search'
5818 _SEARCH_KEY = 'ytsearch'
5819 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
5820 _TESTS = [{
5821 'url': 'ytsearch5:youtube-dl test video',
5822 'playlist_count': 5,
5823 'info_dict': {
5824 'id': 'youtube-dl test video',
5825 'title': 'youtube-dl test video',
5826 }
5827 }]
5828
5829
5830 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5831 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
5832 _SEARCH_KEY = 'ytsearchdate'
5833 IE_DESC = 'YouTube search, newest videos first'
5834 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
5835 _TESTS = [{
5836 'url': 'ytsearchdate5:youtube-dl test video',
5837 'playlist_count': 5,
5838 'info_dict': {
5839 'id': 'youtube-dl test video',
5840 'title': 'youtube-dl test video',
5841 }
5842 }]
5843
5844
5845 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
5846 IE_DESC = 'YouTube search URLs with sorting and filter support'
5847 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
5848 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5849 _TESTS = [{
5850 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
5851 'playlist_mincount': 5,
5852 'info_dict': {
5853 'id': 'youtube-dl test video',
5854 'title': 'youtube-dl test video',
5855 }
5856 }, {
5857 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
5858 'playlist_mincount': 5,
5859 'info_dict': {
5860 'id': 'python',
5861 'title': 'python',
5862 }
5863 }, {
5864 'url': 'https://www.youtube.com/results?search_query=%23cats',
5865 'playlist_mincount': 1,
5866 'info_dict': {
5867 'id': '#cats',
5868 'title': '#cats',
5869 # The test suite does not have support for nested playlists
5870 # 'entries': [{
5871 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
5872 # 'title': '#cats',
5873 # }],
5874 },
5875 }, {
5876 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
5877 'only_matching': True,
5878 }]
5879
5880 def _real_extract(self, url):
5881 qs = parse_qs(url)
5882 query = (qs.get('search_query') or qs.get('q'))[0]
5883 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
5884
5885
5886 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
5887 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
5888 IE_NAME = 'youtube:music:search_url'
5889 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5890 _TESTS = [{
5891 'url': 'https://music.youtube.com/search?q=royalty+free+music',
5892 'playlist_count': 16,
5893 'info_dict': {
5894 'id': 'royalty free music',
5895 'title': 'royalty free music',
5896 }
5897 }, {
5898 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
5899 'playlist_mincount': 30,
5900 'info_dict': {
5901 'id': 'royalty free music - songs',
5902 'title': 'royalty free music - songs',
5903 },
5904 'params': {'extract_flat': 'in_playlist'}
5905 }, {
5906 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
5907 'playlist_mincount': 30,
5908 'info_dict': {
5909 'id': 'royalty free music - community playlists',
5910 'title': 'royalty free music - community playlists',
5911 },
5912 'params': {'extract_flat': 'in_playlist'}
5913 }]
5914
5915 _SECTIONS = {
5916 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
5917 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
5918 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
5919 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
5920 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
5921 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
5922 }
5923
5924 def _real_extract(self, url):
5925 qs = parse_qs(url)
5926 query = (qs.get('search_query') or qs.get('q'))[0]
5927 params = qs.get('sp', (None,))[0]
5928 if params:
5929 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
5930 else:
5931 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
5932 params = self._SECTIONS.get(section)
5933 if not params:
5934 section = None
5935 title = join_nonempty(query, section, delim=' - ')
5936 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
5937
5938
5939 class YoutubeFeedsInfoExtractor(InfoExtractor):
5940 """
5941 Base class for feed extractors
5942 Subclasses must re-define the _FEED_NAME property.
5943 """
5944 _LOGIN_REQUIRED = True
5945 _FEED_NAME = 'feeds'
5946
5947 def _real_initialize(self):
5948 YoutubeBaseInfoExtractor._check_login_required(self)
5949
5950 @classproperty
5951 def IE_NAME(self):
5952 return f'youtube:{self._FEED_NAME}'
5953
5954 def _real_extract(self, url):
5955 return self.url_result(
5956 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
5957
5958
5959 class YoutubeWatchLaterIE(InfoExtractor):
5960 IE_NAME = 'youtube:watchlater'
5961 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
5962 _VALID_URL = r':ytwatchlater'
5963 _TESTS = [{
5964 'url': ':ytwatchlater',
5965 'only_matching': True,
5966 }]
5967
5968 def _real_extract(self, url):
5969 return self.url_result(
5970 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
5971
5972
5973 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
5974 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
5975 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
5976 _FEED_NAME = 'recommended'
5977 _LOGIN_REQUIRED = False
5978 _TESTS = [{
5979 'url': ':ytrec',
5980 'only_matching': True,
5981 }, {
5982 'url': ':ytrecommended',
5983 'only_matching': True,
5984 }, {
5985 'url': 'https://youtube.com',
5986 'only_matching': True,
5987 }]
5988
5989
5990 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
5991 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
5992 _VALID_URL = r':ytsub(?:scription)?s?'
5993 _FEED_NAME = 'subscriptions'
5994 _TESTS = [{
5995 'url': ':ytsubs',
5996 'only_matching': True,
5997 }, {
5998 'url': ':ytsubscriptions',
5999 'only_matching': True,
6000 }]
6001
6002
6003 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
6004 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
6005 _VALID_URL = r':ythis(?:tory)?'
6006 _FEED_NAME = 'history'
6007 _TESTS = [{
6008 'url': ':ythistory',
6009 'only_matching': True,
6010 }]
6011
6012
6013 class YoutubeStoriesIE(InfoExtractor):
6014 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6015 IE_NAME = 'youtube:stories'
6016 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6017 _TESTS = [{
6018 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6019 'only_matching': True,
6020 }]
6021
6022 def _real_extract(self, url):
6023 playlist_id = f'RLTD{self._match_id(url)}'
6024 return self.url_result(
6025 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6026 ie=YoutubeTabIE, video_id=playlist_id)
6027
6028
6029 class YoutubeTruncatedURLIE(InfoExtractor):
6030 IE_NAME = 'youtube:truncated_url'
6031 IE_DESC = False # Do not list
6032 _VALID_URL = r'''(?x)
6033 (?:https?://)?
6034 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6035 (?:watch\?(?:
6036 feature=[a-z_]+|
6037 annotation_id=annotation_[^&]+|
6038 x-yt-cl=[0-9]+|
6039 hl=[^&]*|
6040 t=[0-9]+
6041 )?
6042 |
6043 attribution_link\?a=[^&]+
6044 )
6045 $
6046 '''
6047
6048 _TESTS = [{
6049 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
6050 'only_matching': True,
6051 }, {
6052 'url': 'https://www.youtube.com/watch?',
6053 'only_matching': True,
6054 }, {
6055 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6056 'only_matching': True,
6057 }, {
6058 'url': 'https://www.youtube.com/watch?feature=foo',
6059 'only_matching': True,
6060 }, {
6061 'url': 'https://www.youtube.com/watch?hl=en-GB',
6062 'only_matching': True,
6063 }, {
6064 'url': 'https://www.youtube.com/watch?t=2372',
6065 'only_matching': True,
6066 }]
6067
6068 def _real_extract(self, url):
6069 raise ExtractorError(
6070 'Did you forget to quote the URL? Remember that & is a meta '
6071 'character in most shells, so you want to put the URL in quotes, '
6072 'like youtube-dl '
6073 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
6074 ' or simply youtube-dl BaW_jenozKc .',
6075 expected=True)
6076
6077
6078 class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
6079 IE_NAME = 'youtube:clip'
6080 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6081 _TESTS = [{
6082 # FIXME: Other metadata should be extracted from the clip, not from the base video
6083 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6084 'info_dict': {
6085 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6086 'ext': 'mp4',
6087 'section_start': 29.0,
6088 'section_end': 39.7,
6089 'duration': 10.7,
6090 'age_limit': 0,
6091 'availability': 'public',
6092 'categories': ['Gaming'],
6093 'channel': 'Scott The Woz',
6094 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6095 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6096 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6097 'like_count': int,
6098 'playable_in_embed': True,
6099 'tags': 'count:17',
6100 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6101 'title': 'Mobile Games on Console - Scott The Woz',
6102 'upload_date': '20210920',
6103 'uploader': 'Scott The Woz',
6104 'uploader_id': 'scottthewoz',
6105 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6106 'view_count': int,
6107 'live_status': 'not_live',
6108 'channel_follower_count': int
6109 }
6110 }]
6111
6112 def _real_extract(self, url):
6113 clip_id = self._match_id(url)
6114 _, data = self._extract_webpage(url, clip_id)
6115
6116 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6117 if not video_id:
6118 raise ExtractorError('Unable to find video ID')
6119
6120 clip_data = traverse_obj(data, (
6121 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6122 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6123 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6124 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6125
6126 return {
6127 '_type': 'url_transparent',
6128 'url': f'https://www.youtube.com/watch?v={video_id}',
6129 'ie_key': YoutubeIE.ie_key(),
6130 'id': clip_id,
6131 'section_start': int(clip_data['startTimeMs']) / 1000,
6132 'section_end': int(clip_data['endTimeMs']) / 1000,
6133 }
6134
6135
6136 class YoutubeTruncatedIDIE(InfoExtractor):
6137 IE_NAME = 'youtube:truncated_id'
6138 IE_DESC = False # Do not list
6139 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
6140
6141 _TESTS = [{
6142 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6143 'only_matching': True,
6144 }]
6145
6146 def _real_extract(self, url):
6147 video_id = self._match_id(url)
6148 raise ExtractorError(
6149 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
6150 expected=True)