]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/extractor/youtube.py
[ExtractAudio] Support `alac`
[yt-dlp.git] / yt_dlp / extractor / youtube.py
... / ...
CommitLineData
1# coding: utf-8
2
3from __future__ import unicode_literals
4
5import base64
6import calendar
7import copy
8import datetime
9import hashlib
10import itertools
11import json
12import math
13import os.path
14import random
15import re
16import time
17import traceback
18
19from .common import InfoExtractor, SearchInfoExtractor
20from ..compat import (
21 compat_chr,
22 compat_HTTPError,
23 compat_parse_qs,
24 compat_str,
25 compat_urllib_parse_unquote_plus,
26 compat_urllib_parse_urlencode,
27 compat_urllib_parse_urlparse,
28 compat_urlparse,
29)
30from ..jsinterp import JSInterpreter
31from ..utils import (
32 bug_reports_message,
33 bytes_to_intlist,
34 clean_html,
35 datetime_from_str,
36 dict_get,
37 error_to_compat_str,
38 ExtractorError,
39 float_or_none,
40 format_field,
41 int_or_none,
42 intlist_to_bytes,
43 is_html,
44 join_nonempty,
45 mimetype2ext,
46 network_exceptions,
47 orderedSet,
48 parse_codecs,
49 parse_count,
50 parse_duration,
51 parse_iso8601,
52 parse_qs,
53 qualities,
54 remove_end,
55 remove_start,
56 smuggle_url,
57 str_or_none,
58 str_to_int,
59 traverse_obj,
60 try_get,
61 unescapeHTML,
62 unified_strdate,
63 unsmuggle_url,
64 update_url_query,
65 url_or_none,
66 urljoin,
67 variadic,
68)
69
70
71def get_first(obj, keys, **kwargs):
72 return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
73
74
75# any clients starting with _ cannot be explicity requested by the user
76INNERTUBE_CLIENTS = {
77 'web': {
78 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
79 'INNERTUBE_CONTEXT': {
80 'client': {
81 'clientName': 'WEB',
82 'clientVersion': '2.20210622.10.00',
83 }
84 },
85 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
86 },
87 'web_embedded': {
88 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
89 'INNERTUBE_CONTEXT': {
90 'client': {
91 'clientName': 'WEB_EMBEDDED_PLAYER',
92 'clientVersion': '1.20210620.0.1',
93 },
94 },
95 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
96 },
97 'web_music': {
98 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
99 'INNERTUBE_HOST': 'music.youtube.com',
100 'INNERTUBE_CONTEXT': {
101 'client': {
102 'clientName': 'WEB_REMIX',
103 'clientVersion': '1.20210621.00.00',
104 }
105 },
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
107 },
108 'web_creator': {
109 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
110 'INNERTUBE_CONTEXT': {
111 'client': {
112 'clientName': 'WEB_CREATOR',
113 'clientVersion': '1.20210621.00.00',
114 }
115 },
116 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
117 },
118 'android': {
119 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
120 'INNERTUBE_CONTEXT': {
121 'client': {
122 'clientName': 'ANDROID',
123 'clientVersion': '16.20',
124 }
125 },
126 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
127 'REQUIRE_JS_PLAYER': False
128 },
129 'android_embedded': {
130 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
131 'INNERTUBE_CONTEXT': {
132 'client': {
133 'clientName': 'ANDROID_EMBEDDED_PLAYER',
134 'clientVersion': '16.20',
135 },
136 },
137 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
138 'REQUIRE_JS_PLAYER': False
139 },
140 'android_music': {
141 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
142 'INNERTUBE_HOST': 'music.youtube.com',
143 'INNERTUBE_CONTEXT': {
144 'client': {
145 'clientName': 'ANDROID_MUSIC',
146 'clientVersion': '4.32',
147 }
148 },
149 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
150 'REQUIRE_JS_PLAYER': False
151 },
152 'android_creator': {
153 'INNERTUBE_CONTEXT': {
154 'client': {
155 'clientName': 'ANDROID_CREATOR',
156 'clientVersion': '21.24.100',
157 },
158 },
159 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
160 'REQUIRE_JS_PLAYER': False
161 },
162 # ios has HLS live streams
163 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
164 'ios': {
165 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
166 'INNERTUBE_CONTEXT': {
167 'client': {
168 'clientName': 'IOS',
169 'clientVersion': '16.20',
170 }
171 },
172 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
173 'REQUIRE_JS_PLAYER': False
174 },
175 'ios_embedded': {
176 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
177 'INNERTUBE_CONTEXT': {
178 'client': {
179 'clientName': 'IOS_MESSAGES_EXTENSION',
180 'clientVersion': '16.20',
181 },
182 },
183 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
184 'REQUIRE_JS_PLAYER': False
185 },
186 'ios_music': {
187 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
188 'INNERTUBE_HOST': 'music.youtube.com',
189 'INNERTUBE_CONTEXT': {
190 'client': {
191 'clientName': 'IOS_MUSIC',
192 'clientVersion': '4.32',
193 },
194 },
195 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
196 'REQUIRE_JS_PLAYER': False
197 },
198 'ios_creator': {
199 'INNERTUBE_CONTEXT': {
200 'client': {
201 'clientName': 'IOS_CREATOR',
202 'clientVersion': '21.24.100',
203 },
204 },
205 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
206 'REQUIRE_JS_PLAYER': False
207 },
208 # mweb has 'ultralow' formats
209 # See: https://github.com/yt-dlp/yt-dlp/pull/557
210 'mweb': {
211 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
212 'INNERTUBE_CONTEXT': {
213 'client': {
214 'clientName': 'MWEB',
215 'clientVersion': '2.20210721.07.00',
216 }
217 },
218 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
219 },
220}
221
222
223def build_innertube_clients():
224 third_party = {
225 'embedUrl': 'https://google.com', # Can be any valid URL
226 }
227 base_clients = ('android', 'web', 'ios', 'mweb')
228 priority = qualities(base_clients[::-1])
229
230 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
231 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
232 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
233 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
234 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
235 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
236
237 if client in base_clients:
238 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
239 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
240 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
241 agegate_ytcfg['priority'] -= 1
242 elif client.endswith('_embedded'):
243 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
244 ytcfg['priority'] -= 2
245 else:
246 ytcfg['priority'] -= 3
247
248
249build_innertube_clients()
250
251
252class YoutubeBaseInfoExtractor(InfoExtractor):
253 """Provide base functions for Youtube extractors"""
254
255 _RESERVED_NAMES = (
256 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
257 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
258 r'browse|oembed|get_video_info|iframe_api|s/player|'
259 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
260
261 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
262
263 _NETRC_MACHINE = 'youtube'
264
265 # If True it will raise an error if no login info is provided
266 _LOGIN_REQUIRED = False
267
268 _INVIDIOUS_SITES = (
269 # invidious-redirect websites
270 r'(?:www\.)?redirect\.invidious\.io',
271 r'(?:(?:www|dev)\.)?invidio\.us',
272 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
273 r'(?:www\.)?invidious\.pussthecat\.org',
274 r'(?:www\.)?invidious\.zee\.li',
275 r'(?:www\.)?invidious\.ethibox\.fr',
276 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
277 # youtube-dl invidious instances list
278 r'(?:(?:www|no)\.)?invidiou\.sh',
279 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
280 r'(?:www\.)?invidious\.kabi\.tk',
281 r'(?:www\.)?invidious\.mastodon\.host',
282 r'(?:www\.)?invidious\.zapashcanon\.fr',
283 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
284 r'(?:www\.)?invidious\.tinfoil-hat\.net',
285 r'(?:www\.)?invidious\.himiko\.cloud',
286 r'(?:www\.)?invidious\.reallyancient\.tech',
287 r'(?:www\.)?invidious\.tube',
288 r'(?:www\.)?invidiou\.site',
289 r'(?:www\.)?invidious\.site',
290 r'(?:www\.)?invidious\.xyz',
291 r'(?:www\.)?invidious\.nixnet\.xyz',
292 r'(?:www\.)?invidious\.048596\.xyz',
293 r'(?:www\.)?invidious\.drycat\.fr',
294 r'(?:www\.)?inv\.skyn3t\.in',
295 r'(?:www\.)?tube\.poal\.co',
296 r'(?:www\.)?tube\.connect\.cafe',
297 r'(?:www\.)?vid\.wxzm\.sx',
298 r'(?:www\.)?vid\.mint\.lgbt',
299 r'(?:www\.)?vid\.puffyan\.us',
300 r'(?:www\.)?yewtu\.be',
301 r'(?:www\.)?yt\.elukerio\.org',
302 r'(?:www\.)?yt\.lelux\.fi',
303 r'(?:www\.)?invidious\.ggc-project\.de',
304 r'(?:www\.)?yt\.maisputain\.ovh',
305 r'(?:www\.)?ytprivate\.com',
306 r'(?:www\.)?invidious\.13ad\.de',
307 r'(?:www\.)?invidious\.toot\.koeln',
308 r'(?:www\.)?invidious\.fdn\.fr',
309 r'(?:www\.)?watch\.nettohikari\.com',
310 r'(?:www\.)?invidious\.namazso\.eu',
311 r'(?:www\.)?invidious\.silkky\.cloud',
312 r'(?:www\.)?invidious\.exonip\.de',
313 r'(?:www\.)?invidious\.riverside\.rocks',
314 r'(?:www\.)?invidious\.blamefran\.net',
315 r'(?:www\.)?invidious\.moomoo\.de',
316 r'(?:www\.)?ytb\.trom\.tf',
317 r'(?:www\.)?yt\.cyberhost\.uk',
318 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
319 r'(?:www\.)?qklhadlycap4cnod\.onion',
320 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
321 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
322 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
323 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
324 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
325 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
326 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
327 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
328 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
329 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
330 )
331
332 def _login(self):
333 """
334 Attempt to log in to YouTube.
335 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
336 """
337
338 if (self._LOGIN_REQUIRED
339 and self.get_param('cookiefile') is None
340 and self.get_param('cookiesfrombrowser') is None):
341 self.raise_login_required(
342 'Login details are needed to download this content', method='cookies')
343 username, password = self._get_login_info()
344 if username:
345 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
346
347 def _initialize_consent(self):
348 cookies = self._get_cookies('https://www.youtube.com/')
349 if cookies.get('__Secure-3PSID'):
350 return
351 consent_id = None
352 consent = cookies.get('CONSENT')
353 if consent:
354 if 'YES' in consent.value:
355 return
356 consent_id = self._search_regex(
357 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
358 if not consent_id:
359 consent_id = random.randint(100, 999)
360 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
361
362 def _real_initialize(self):
363 self._initialize_consent()
364 self._login()
365
366 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
367 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
368 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
369
370 def _get_default_ytcfg(self, client='web'):
371 return copy.deepcopy(INNERTUBE_CLIENTS[client])
372
373 def _get_innertube_host(self, client='web'):
374 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
375
376 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
377 # try_get but with fallback to default ytcfg client values when present
378 _func = lambda y: try_get(y, getter, expected_type)
379 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
380
381 def _extract_client_name(self, ytcfg, default_client='web'):
382 return self._ytcfg_get_safe(
383 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
384 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
385
386 def _extract_client_version(self, ytcfg, default_client='web'):
387 return self._ytcfg_get_safe(
388 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
389 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
390
391 def _extract_api_key(self, ytcfg=None, default_client='web'):
392 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
393
394 def _extract_context(self, ytcfg=None, default_client='web'):
395 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
396 context = _get_context(ytcfg)
397 if context:
398 return context
399
400 context = _get_context(self._get_default_ytcfg(default_client))
401 if not ytcfg:
402 return context
403
404 # Recreate the client context (required)
405 context['client'].update({
406 'clientVersion': self._extract_client_version(ytcfg, default_client),
407 'clientName': self._extract_client_name(ytcfg, default_client),
408 })
409 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
410 if visitor_data:
411 context['client']['visitorData'] = visitor_data
412 return context
413
414 _SAPISID = None
415
416 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
417 time_now = round(time.time())
418 if self._SAPISID is None:
419 yt_cookies = self._get_cookies('https://www.youtube.com')
420 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
421 # See: https://github.com/yt-dlp/yt-dlp/issues/393
422 sapisid_cookie = dict_get(
423 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
424 if sapisid_cookie and sapisid_cookie.value:
425 self._SAPISID = sapisid_cookie.value
426 self.write_debug('Extracted SAPISID cookie')
427 # SAPISID cookie is required if not already present
428 if not yt_cookies.get('SAPISID'):
429 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
430 self._set_cookie(
431 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
432 else:
433 self._SAPISID = False
434 if not self._SAPISID:
435 return None
436 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
437 sapisidhash = hashlib.sha1(
438 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
439 return f'SAPISIDHASH {time_now}_{sapisidhash}'
440
441 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
442 note='Downloading API JSON', errnote='Unable to download API page',
443 context=None, api_key=None, api_hostname=None, default_client='web'):
444
445 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
446 data.update(query)
447 real_headers = self.generate_api_headers(default_client=default_client)
448 real_headers.update({'content-type': 'application/json'})
449 if headers:
450 real_headers.update(headers)
451 return self._download_json(
452 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
453 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
454 data=json.dumps(data).encode('utf8'), headers=real_headers,
455 query={'key': api_key or self._extract_api_key()})
456
457 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
458 data = self._search_regex(
459 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
460 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
461 if data:
462 return self._parse_json(data, item_id, fatal=fatal)
463
464 @staticmethod
465 def _extract_session_index(*data):
466 """
467 Index of current account in account list.
468 See: https://github.com/yt-dlp/yt-dlp/pull/519
469 """
470 for ytcfg in data:
471 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
472 if session_index is not None:
473 return session_index
474
475 # Deprecated?
476 def _extract_identity_token(self, ytcfg=None, webpage=None):
477 if ytcfg:
478 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
479 if token:
480 return token
481 if webpage:
482 return self._search_regex(
483 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
484 'identity token', default=None, fatal=False)
485
486 @staticmethod
487 def _extract_account_syncid(*args):
488 """
489 Extract syncId required to download private playlists of secondary channels
490 @params response and/or ytcfg
491 """
492 for data in args:
493 # ytcfg includes channel_syncid if on secondary channel
494 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
495 if delegated_sid:
496 return delegated_sid
497 sync_ids = (try_get(
498 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
499 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
500 if len(sync_ids) >= 2 and sync_ids[1]:
501 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
502 # and just "user_syncid||" for primary channel. We only want the channel_syncid
503 return sync_ids[0]
504
505 @staticmethod
506 def _extract_visitor_data(*args):
507 """
508 Extracts visitorData from an API response or ytcfg
509 Appears to be used to track session state
510 """
511 return traverse_obj(
512 args, (..., ('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
513 expected_type=compat_str, get_all=False)
514
515 @property
516 def is_authenticated(self):
517 return bool(self._generate_sapisidhash_header())
518
519 def extract_ytcfg(self, video_id, webpage):
520 if not webpage:
521 return {}
522 return self._parse_json(
523 self._search_regex(
524 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
525 default='{}'), video_id, fatal=False) or {}
526
527 def generate_api_headers(
528 self, *, ytcfg=None, account_syncid=None, session_index=None,
529 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
530
531 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
532 headers = {
533 'X-YouTube-Client-Name': compat_str(
534 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
535 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
536 'Origin': origin,
537 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
538 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
539 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
540 }
541 if session_index is None:
542 session_index = self._extract_session_index(ytcfg)
543 if account_syncid or session_index is not None:
544 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
545
546 auth = self._generate_sapisidhash_header(origin)
547 if auth is not None:
548 headers['Authorization'] = auth
549 headers['X-Origin'] = origin
550 return {h: v for h, v in headers.items() if v is not None}
551
552 @staticmethod
553 def _build_api_continuation_query(continuation, ctp=None):
554 query = {
555 'continuation': continuation
556 }
557 # TODO: Inconsistency with clickTrackingParams.
558 # Currently we have a fixed ctp contained within context (from ytcfg)
559 # and a ctp in root query for continuation.
560 if ctp:
561 query['clickTracking'] = {'clickTrackingParams': ctp}
562 return query
563
564 @classmethod
565 def _extract_next_continuation_data(cls, renderer):
566 next_continuation = try_get(
567 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
568 lambda x: x['continuation']['reloadContinuationData']), dict)
569 if not next_continuation:
570 return
571 continuation = next_continuation.get('continuation')
572 if not continuation:
573 return
574 ctp = next_continuation.get('clickTrackingParams')
575 return cls._build_api_continuation_query(continuation, ctp)
576
577 @classmethod
578 def _extract_continuation_ep_data(cls, continuation_ep: dict):
579 if isinstance(continuation_ep, dict):
580 continuation = try_get(
581 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
582 if not continuation:
583 return
584 ctp = continuation_ep.get('clickTrackingParams')
585 return cls._build_api_continuation_query(continuation, ctp)
586
587 @classmethod
588 def _extract_continuation(cls, renderer):
589 next_continuation = cls._extract_next_continuation_data(renderer)
590 if next_continuation:
591 return next_continuation
592
593 contents = []
594 for key in ('contents', 'items'):
595 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
596
597 for content in contents:
598 if not isinstance(content, dict):
599 continue
600 continuation_ep = try_get(
601 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
602 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
603 dict)
604 continuation = cls._extract_continuation_ep_data(continuation_ep)
605 if continuation:
606 return continuation
607
608 @classmethod
609 def _extract_alerts(cls, data):
610 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
611 if not isinstance(alert_dict, dict):
612 continue
613 for alert in alert_dict.values():
614 alert_type = alert.get('type')
615 if not alert_type:
616 continue
617 message = cls._get_text(alert, 'text')
618 if message:
619 yield alert_type, message
620
621 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
622 errors = []
623 warnings = []
624 for alert_type, alert_message in alerts:
625 if alert_type.lower() == 'error' and fatal:
626 errors.append([alert_type, alert_message])
627 else:
628 warnings.append([alert_type, alert_message])
629
630 for alert_type, alert_message in (warnings + errors[:-1]):
631 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
632 if errors:
633 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
634
635 def _extract_and_report_alerts(self, data, *args, **kwargs):
636 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
637
638 def _extract_badges(self, renderer: dict):
639 badges = set()
640 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
641 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
642 if label:
643 badges.add(label.lower())
644 return badges
645
646 @staticmethod
647 def _get_text(data, *path_list, max_runs=None):
648 for path in path_list or [None]:
649 if path is None:
650 obj = [data]
651 else:
652 obj = traverse_obj(data, path, default=[])
653 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
654 obj = [obj]
655 for item in obj:
656 text = try_get(item, lambda x: x['simpleText'], compat_str)
657 if text:
658 return text
659 runs = try_get(item, lambda x: x['runs'], list) or []
660 if not runs and isinstance(item, list):
661 runs = item
662
663 runs = runs[:min(len(runs), max_runs or len(runs))]
664 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
665 if text:
666 return text
667
668 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
669 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
670 default_client='web'):
671 response = None
672 last_error = None
673 count = -1
674 retries = self.get_param('extractor_retries', 3)
675 if check_get_keys is None:
676 check_get_keys = []
677 while count < retries:
678 count += 1
679 if last_error:
680 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
681 try:
682 response = self._call_api(
683 ep=ep, fatal=True, headers=headers,
684 video_id=item_id, query=query,
685 context=self._extract_context(ytcfg, default_client),
686 api_key=self._extract_api_key(ytcfg, default_client),
687 api_hostname=api_hostname, default_client=default_client,
688 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
689 except ExtractorError as e:
690 if isinstance(e.cause, network_exceptions):
691 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
692 e.cause.seek(0)
693 yt_error = try_get(
694 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
695 lambda x: x['error']['message'], compat_str)
696 if yt_error:
697 self._report_alerts([('ERROR', yt_error)], fatal=False)
698 # Downloading page may result in intermittent 5xx HTTP error
699 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
700 # We also want to catch all other network exceptions since errors in later pages can be troublesome
701 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
702 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
703 last_error = error_to_compat_str(e.cause or e.msg)
704 if count < retries:
705 continue
706 if fatal:
707 raise
708 else:
709 self.report_warning(error_to_compat_str(e))
710 return
711
712 else:
713 try:
714 self._extract_and_report_alerts(response, only_once=True)
715 except ExtractorError as e:
716 # YouTube servers may return errors we want to retry on in a 200 OK response
717 # See: https://github.com/yt-dlp/yt-dlp/issues/839
718 if 'unknown error' in e.msg.lower():
719 last_error = e.msg
720 continue
721 if fatal:
722 raise
723 self.report_warning(error_to_compat_str(e))
724 return
725 if not check_get_keys or dict_get(response, check_get_keys):
726 break
727 # Youtube sometimes sends incomplete data
728 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
729 last_error = 'Incomplete data received'
730 if count >= retries:
731 if fatal:
732 raise ExtractorError(last_error)
733 else:
734 self.report_warning(last_error)
735 return
736 return response
737
738 @staticmethod
739 def is_music_url(url):
740 return re.match(r'https?://music\.youtube\.com/', url) is not None
741
742 def _extract_video(self, renderer):
743 video_id = renderer.get('videoId')
744 title = self._get_text(renderer, 'title')
745 description = self._get_text(renderer, 'descriptionSnippet')
746 duration = parse_duration(self._get_text(
747 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
748 view_count_text = self._get_text(renderer, 'viewCountText') or ''
749 view_count = str_to_int(self._search_regex(
750 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
751 'view count', default=None))
752
753 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
754
755 return {
756 '_type': 'url',
757 'ie_key': YoutubeIE.ie_key(),
758 'id': video_id,
759 'url': f'https://www.youtube.com/watch?v={video_id}',
760 'title': title,
761 'description': description,
762 'duration': duration,
763 'view_count': view_count,
764 'uploader': uploader,
765 }
766
767
768class YoutubeIE(YoutubeBaseInfoExtractor):
769 IE_DESC = 'YouTube'
770 _VALID_URL = r"""(?x)^
771 (
772 (?:https?://|//) # http(s):// or protocol-independent URL
773 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
774 (?:www\.)?deturl\.com/www\.youtube\.com|
775 (?:www\.)?pwnyoutube\.com|
776 (?:www\.)?hooktube\.com|
777 (?:www\.)?yourepeat\.com|
778 tube\.majestyc\.net|
779 %(invidious)s|
780 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
781 (?:.*?\#/)? # handle anchor (#/) redirect urls
782 (?: # the various things that can precede the ID:
783 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
784 |(?: # or the v= param in all its forms
785 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
786 (?:\?|\#!?) # the params delimiter ? or # or #!
787 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
788 v=
789 )
790 ))
791 |(?:
792 youtu\.be| # just youtu.be/xxxx
793 vid\.plus| # or vid.plus/xxxx
794 zwearz\.com/watch| # or zwearz.com/watch/xxxx
795 %(invidious)s
796 )/
797 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
798 )
799 )? # all until now is optional -> you can pass the naked ID
800 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
801 (?(1).+)? # if we found the ID, everything can follow
802 (?:\#|$)""" % {
803 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
804 }
805 _PLAYER_INFO_RE = (
806 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
807 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
808 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
809 )
810 _formats = {
811 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
812 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
813 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
814 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
815 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
816 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
817 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
818 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
819 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
820 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
821 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
822 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
823 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
824 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
825 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
826 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
827 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
828 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
829
830
831 # 3D videos
832 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
833 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
834 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
835 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
836 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
837 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
838 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
839
840 # Apple HTTP Live Streaming
841 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
842 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
843 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
844 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
845 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
846 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
847 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
848 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
849
850 # DASH mp4 video
851 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
852 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
853 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
854 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
855 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
856 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
857 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
858 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
859 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
860 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
861 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
862 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
863
864 # Dash mp4 audio
865 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
866 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
867 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
868 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
869 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
870 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
871 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
872
873 # Dash webm
874 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
875 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
876 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
877 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
878 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
879 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
880 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
881 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
882 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
883 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
884 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
885 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
886 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
887 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
888 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
889 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
890 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
891 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
892 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
893 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
894 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
895 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
896
897 # Dash webm audio
898 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
899 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
900
901 # Dash webm audio with opus inside
902 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
903 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
904 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
905
906 # RTMP (unnamed)
907 '_rtmp': {'protocol': 'rtmp'},
908
909 # av01 video only formats sometimes served with "unknown" codecs
910 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
911 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
912 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
913 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
914 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
915 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
916 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
917 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
918 }
919 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
920
921 _GEO_BYPASS = False
922
923 IE_NAME = 'youtube'
924 _TESTS = [
925 {
926 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
927 'info_dict': {
928 'id': 'BaW_jenozKc',
929 'ext': 'mp4',
930 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
931 'uploader': 'Philipp Hagemeister',
932 'uploader_id': 'phihag',
933 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
934 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
935 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
936 'upload_date': '20121002',
937 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
938 'categories': ['Science & Technology'],
939 'tags': ['youtube-dl'],
940 'duration': 10,
941 'view_count': int,
942 'like_count': int,
943 'dislike_count': int,
944 'start_time': 1,
945 'end_time': 9,
946 }
947 },
948 {
949 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
950 'note': 'Embed-only video (#1746)',
951 'info_dict': {
952 'id': 'yZIXLfi8CZQ',
953 'ext': 'mp4',
954 'upload_date': '20120608',
955 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
956 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
957 'uploader': 'SET India',
958 'uploader_id': 'setindia',
959 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
960 'age_limit': 18,
961 },
962 'skip': 'Private video',
963 },
964 {
965 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
966 'note': 'Use the first video ID in the URL',
967 'info_dict': {
968 'id': 'BaW_jenozKc',
969 'ext': 'mp4',
970 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
971 'uploader': 'Philipp Hagemeister',
972 'uploader_id': 'phihag',
973 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
974 'upload_date': '20121002',
975 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
976 'categories': ['Science & Technology'],
977 'tags': ['youtube-dl'],
978 'duration': 10,
979 'view_count': int,
980 'like_count': int,
981 'dislike_count': int,
982 },
983 'params': {
984 'skip_download': True,
985 },
986 },
987 {
988 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
989 'note': '256k DASH audio (format 141) via DASH manifest',
990 'info_dict': {
991 'id': 'a9LDPn-MO4I',
992 'ext': 'm4a',
993 'upload_date': '20121002',
994 'uploader_id': '8KVIDEO',
995 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
996 'description': '',
997 'uploader': '8KVIDEO',
998 'title': 'UHDTV TEST 8K VIDEO.mp4'
999 },
1000 'params': {
1001 'youtube_include_dash_manifest': True,
1002 'format': '141',
1003 },
1004 'skip': 'format 141 not served anymore',
1005 },
1006 # DASH manifest with encrypted signature
1007 {
1008 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1009 'info_dict': {
1010 'id': 'IB3lcPjvWLA',
1011 'ext': 'm4a',
1012 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1013 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1014 'duration': 244,
1015 'uploader': 'AfrojackVEVO',
1016 'uploader_id': 'AfrojackVEVO',
1017 'upload_date': '20131011',
1018 'abr': 129.495,
1019 },
1020 'params': {
1021 'youtube_include_dash_manifest': True,
1022 'format': '141/bestaudio[ext=m4a]',
1023 },
1024 },
1025 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1026 {
1027 'note': 'Embed allowed age-gate video',
1028 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1029 'info_dict': {
1030 'id': 'HtVdAasjOgU',
1031 'ext': 'mp4',
1032 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1033 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1034 'duration': 142,
1035 'uploader': 'The Witcher',
1036 'uploader_id': 'WitcherGame',
1037 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1038 'upload_date': '20140605',
1039 'age_limit': 18,
1040 },
1041 },
1042 {
1043 'note': 'Age-gate video with embed allowed in public site',
1044 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1045 'info_dict': {
1046 'id': 'HsUATh_Nc2U',
1047 'ext': 'mp4',
1048 'title': 'Godzilla 2 (Official Video)',
1049 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1050 'upload_date': '20200408',
1051 'uploader_id': 'FlyingKitty900',
1052 'uploader': 'FlyingKitty',
1053 'age_limit': 18,
1054 },
1055 },
1056 {
1057 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1058 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1059 'info_dict': {
1060 'id': 'Tq92D6wQ1mg',
1061 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1062 'ext': 'mp4',
1063 'upload_date': '20191227',
1064 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1065 'uploader': 'Projekt Melody',
1066 'description': 'md5:17eccca93a786d51bc67646756894066',
1067 'age_limit': 18,
1068 },
1069 },
1070 {
1071 'note': 'Non-Agegated non-embeddable video',
1072 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1073 'info_dict': {
1074 'id': 'MeJVWBSsPAY',
1075 'ext': 'mp4',
1076 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1077 'uploader': 'Herr Lurik',
1078 'uploader_id': 'st3in234',
1079 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1080 'upload_date': '20130730',
1081 },
1082 },
1083 {
1084 'note': 'Non-bypassable age-gated video',
1085 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1086 'only_matching': True,
1087 },
1088 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1089 # YouTube Red ad is not captured for creator
1090 {
1091 'url': '__2ABJjxzNo',
1092 'info_dict': {
1093 'id': '__2ABJjxzNo',
1094 'ext': 'mp4',
1095 'duration': 266,
1096 'upload_date': '20100430',
1097 'uploader_id': 'deadmau5',
1098 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1099 'creator': 'deadmau5',
1100 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1101 'uploader': 'deadmau5',
1102 'title': 'Deadmau5 - Some Chords (HD)',
1103 'alt_title': 'Some Chords',
1104 },
1105 'expected_warnings': [
1106 'DASH manifest missing',
1107 ]
1108 },
1109 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1110 {
1111 'url': 'lqQg6PlCWgI',
1112 'info_dict': {
1113 'id': 'lqQg6PlCWgI',
1114 'ext': 'mp4',
1115 'duration': 6085,
1116 'upload_date': '20150827',
1117 'uploader_id': 'olympic',
1118 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1119 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1120 'uploader': 'Olympics',
1121 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1122 },
1123 'params': {
1124 'skip_download': 'requires avconv',
1125 }
1126 },
1127 # Non-square pixels
1128 {
1129 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1130 'info_dict': {
1131 'id': '_b-2C3KPAM0',
1132 'ext': 'mp4',
1133 'stretched_ratio': 16 / 9.,
1134 'duration': 85,
1135 'upload_date': '20110310',
1136 'uploader_id': 'AllenMeow',
1137 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1138 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1139 'uploader': '孫ᄋᄅ',
1140 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1141 },
1142 },
1143 # url_encoded_fmt_stream_map is empty string
1144 {
1145 'url': 'qEJwOuvDf7I',
1146 'info_dict': {
1147 'id': 'qEJwOuvDf7I',
1148 'ext': 'webm',
1149 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1150 'description': '',
1151 'upload_date': '20150404',
1152 'uploader_id': 'spbelect',
1153 'uploader': 'Наблюдатели Петербурга',
1154 },
1155 'params': {
1156 'skip_download': 'requires avconv',
1157 },
1158 'skip': 'This live event has ended.',
1159 },
1160 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1161 {
1162 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1163 'info_dict': {
1164 'id': 'FIl7x6_3R5Y',
1165 'ext': 'webm',
1166 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1167 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1168 'duration': 220,
1169 'upload_date': '20150625',
1170 'uploader_id': 'dorappi2000',
1171 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1172 'uploader': 'dorappi2000',
1173 'formats': 'mincount:31',
1174 },
1175 'skip': 'not actual anymore',
1176 },
1177 # DASH manifest with segment_list
1178 {
1179 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1180 'md5': '8ce563a1d667b599d21064e982ab9e31',
1181 'info_dict': {
1182 'id': 'CsmdDsKjzN8',
1183 'ext': 'mp4',
1184 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1185 'uploader': 'Airtek',
1186 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1187 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1188 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1189 },
1190 'params': {
1191 'youtube_include_dash_manifest': True,
1192 'format': '135', # bestvideo
1193 },
1194 'skip': 'This live event has ended.',
1195 },
1196 {
1197 # Multifeed videos (multiple cameras), URL is for Main Camera
1198 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1199 'info_dict': {
1200 'id': 'jvGDaLqkpTg',
1201 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1202 'description': 'md5:e03b909557865076822aa169218d6a5d',
1203 },
1204 'playlist': [{
1205 'info_dict': {
1206 'id': 'jvGDaLqkpTg',
1207 'ext': 'mp4',
1208 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1209 'description': 'md5:e03b909557865076822aa169218d6a5d',
1210 'duration': 10643,
1211 'upload_date': '20161111',
1212 'uploader': 'Team PGP',
1213 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1214 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1215 },
1216 }, {
1217 'info_dict': {
1218 'id': '3AKt1R1aDnw',
1219 'ext': 'mp4',
1220 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1221 'description': 'md5:e03b909557865076822aa169218d6a5d',
1222 'duration': 10991,
1223 'upload_date': '20161111',
1224 'uploader': 'Team PGP',
1225 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1226 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1227 },
1228 }, {
1229 'info_dict': {
1230 'id': 'RtAMM00gpVc',
1231 'ext': 'mp4',
1232 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1233 'description': 'md5:e03b909557865076822aa169218d6a5d',
1234 'duration': 10995,
1235 'upload_date': '20161111',
1236 'uploader': 'Team PGP',
1237 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1238 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1239 },
1240 }, {
1241 'info_dict': {
1242 'id': '6N2fdlP3C5U',
1243 'ext': 'mp4',
1244 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1245 'description': 'md5:e03b909557865076822aa169218d6a5d',
1246 'duration': 10990,
1247 'upload_date': '20161111',
1248 'uploader': 'Team PGP',
1249 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1250 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1251 },
1252 }],
1253 'params': {
1254 'skip_download': True,
1255 },
1256 'skip': 'Not multifeed anymore',
1257 },
1258 {
1259 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1260 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1261 'info_dict': {
1262 'id': 'gVfLd0zydlo',
1263 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1264 },
1265 'playlist_count': 2,
1266 'skip': 'Not multifeed anymore',
1267 },
1268 {
1269 'url': 'https://vid.plus/FlRa-iH7PGw',
1270 'only_matching': True,
1271 },
1272 {
1273 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1274 'only_matching': True,
1275 },
1276 {
1277 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1278 # Also tests cut-off URL expansion in video description (see
1279 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1280 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1281 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1282 'info_dict': {
1283 'id': 'lsguqyKfVQg',
1284 'ext': 'mp4',
1285 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1286 'alt_title': 'Dark Walk',
1287 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1288 'duration': 133,
1289 'upload_date': '20151119',
1290 'uploader_id': 'IronSoulElf',
1291 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1292 'uploader': 'IronSoulElf',
1293 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1294 'track': 'Dark Walk',
1295 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1296 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1297 },
1298 'params': {
1299 'skip_download': True,
1300 },
1301 },
1302 {
1303 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1304 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1305 'only_matching': True,
1306 },
1307 {
1308 # Video with yt:stretch=17:0
1309 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1310 'info_dict': {
1311 'id': 'Q39EVAstoRM',
1312 'ext': 'mp4',
1313 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1314 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1315 'upload_date': '20151107',
1316 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1317 'uploader': 'CH GAMER DROID',
1318 },
1319 'params': {
1320 'skip_download': True,
1321 },
1322 'skip': 'This video does not exist.',
1323 },
1324 {
1325 # Video with incomplete 'yt:stretch=16:'
1326 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1327 'only_matching': True,
1328 },
1329 {
1330 # Video licensed under Creative Commons
1331 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1332 'info_dict': {
1333 'id': 'M4gD1WSo5mA',
1334 'ext': 'mp4',
1335 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1336 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1337 'duration': 721,
1338 'upload_date': '20150127',
1339 'uploader_id': 'BerkmanCenter',
1340 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1341 'uploader': 'The Berkman Klein Center for Internet & Society',
1342 'license': 'Creative Commons Attribution license (reuse allowed)',
1343 },
1344 'params': {
1345 'skip_download': True,
1346 },
1347 },
1348 {
1349 # Channel-like uploader_url
1350 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1351 'info_dict': {
1352 'id': 'eQcmzGIKrzg',
1353 'ext': 'mp4',
1354 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1355 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1356 'duration': 4060,
1357 'upload_date': '20151119',
1358 'uploader': 'Bernie Sanders',
1359 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1360 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1361 'license': 'Creative Commons Attribution license (reuse allowed)',
1362 },
1363 'params': {
1364 'skip_download': True,
1365 },
1366 },
1367 {
1368 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1369 'only_matching': True,
1370 },
1371 {
1372 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1373 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1374 'only_matching': True,
1375 },
1376 {
1377 # Rental video preview
1378 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1379 'info_dict': {
1380 'id': 'uGpuVWrhIzE',
1381 'ext': 'mp4',
1382 'title': 'Piku - Trailer',
1383 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1384 'upload_date': '20150811',
1385 'uploader': 'FlixMatrix',
1386 'uploader_id': 'FlixMatrixKaravan',
1387 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1388 'license': 'Standard YouTube License',
1389 },
1390 'params': {
1391 'skip_download': True,
1392 },
1393 'skip': 'This video is not available.',
1394 },
1395 {
1396 # YouTube Red video with episode data
1397 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1398 'info_dict': {
1399 'id': 'iqKdEhx-dD4',
1400 'ext': 'mp4',
1401 'title': 'Isolation - Mind Field (Ep 1)',
1402 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1403 'duration': 2085,
1404 'upload_date': '20170118',
1405 'uploader': 'Vsauce',
1406 'uploader_id': 'Vsauce',
1407 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1408 'series': 'Mind Field',
1409 'season_number': 1,
1410 'episode_number': 1,
1411 },
1412 'params': {
1413 'skip_download': True,
1414 },
1415 'expected_warnings': [
1416 'Skipping DASH manifest',
1417 ],
1418 },
1419 {
1420 # The following content has been identified by the YouTube community
1421 # as inappropriate or offensive to some audiences.
1422 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1423 'info_dict': {
1424 'id': '6SJNVb0GnPI',
1425 'ext': 'mp4',
1426 'title': 'Race Differences in Intelligence',
1427 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1428 'duration': 965,
1429 'upload_date': '20140124',
1430 'uploader': 'New Century Foundation',
1431 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1432 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1433 },
1434 'params': {
1435 'skip_download': True,
1436 },
1437 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1438 },
1439 {
1440 # itag 212
1441 'url': '1t24XAntNCY',
1442 'only_matching': True,
1443 },
1444 {
1445 # geo restricted to JP
1446 'url': 'sJL6WA-aGkQ',
1447 'only_matching': True,
1448 },
1449 {
1450 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1451 'only_matching': True,
1452 },
1453 {
1454 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1455 'only_matching': True,
1456 },
1457 {
1458 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1459 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1460 'only_matching': True,
1461 },
1462 {
1463 # DRM protected
1464 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1465 'only_matching': True,
1466 },
1467 {
1468 # Video with unsupported adaptive stream type formats
1469 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1470 'info_dict': {
1471 'id': 'Z4Vy8R84T1U',
1472 'ext': 'mp4',
1473 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1474 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1475 'duration': 433,
1476 'upload_date': '20130923',
1477 'uploader': 'Amelia Putri Harwita',
1478 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1479 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1480 'formats': 'maxcount:10',
1481 },
1482 'params': {
1483 'skip_download': True,
1484 'youtube_include_dash_manifest': False,
1485 },
1486 'skip': 'not actual anymore',
1487 },
1488 {
1489 # Youtube Music Auto-generated description
1490 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1491 'info_dict': {
1492 'id': 'MgNrAu2pzNs',
1493 'ext': 'mp4',
1494 'title': 'Voyeur Girl',
1495 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1496 'upload_date': '20190312',
1497 'uploader': 'Stephen - Topic',
1498 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1499 'artist': 'Stephen',
1500 'track': 'Voyeur Girl',
1501 'album': 'it\'s too much love to know my dear',
1502 'release_date': '20190313',
1503 'release_year': 2019,
1504 },
1505 'params': {
1506 'skip_download': True,
1507 },
1508 },
1509 {
1510 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1511 'only_matching': True,
1512 },
1513 {
1514 # invalid -> valid video id redirection
1515 'url': 'DJztXj2GPfl',
1516 'info_dict': {
1517 'id': 'DJztXj2GPfk',
1518 'ext': 'mp4',
1519 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1520 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1521 'upload_date': '20090125',
1522 'uploader': 'Prochorowka',
1523 'uploader_id': 'Prochorowka',
1524 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1525 'artist': 'Panjabi MC',
1526 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1527 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1528 },
1529 'params': {
1530 'skip_download': True,
1531 },
1532 'skip': 'Video unavailable',
1533 },
1534 {
1535 # empty description results in an empty string
1536 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1537 'info_dict': {
1538 'id': 'x41yOUIvK2k',
1539 'ext': 'mp4',
1540 'title': 'IMG 3456',
1541 'description': '',
1542 'upload_date': '20170613',
1543 'uploader_id': 'ElevageOrVert',
1544 'uploader': 'ElevageOrVert',
1545 },
1546 'params': {
1547 'skip_download': True,
1548 },
1549 },
1550 {
1551 # with '};' inside yt initial data (see [1])
1552 # see [2] for an example with '};' inside ytInitialPlayerResponse
1553 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1554 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1555 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1556 'info_dict': {
1557 'id': 'CHqg6qOn4no',
1558 'ext': 'mp4',
1559 'title': 'Part 77 Sort a list of simple types in c#',
1560 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1561 'upload_date': '20130831',
1562 'uploader_id': 'kudvenkat',
1563 'uploader': 'kudvenkat',
1564 },
1565 'params': {
1566 'skip_download': True,
1567 },
1568 },
1569 {
1570 # another example of '};' in ytInitialData
1571 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1572 'only_matching': True,
1573 },
1574 {
1575 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1576 'only_matching': True,
1577 },
1578 {
1579 # https://github.com/ytdl-org/youtube-dl/pull/28094
1580 'url': 'OtqTfy26tG0',
1581 'info_dict': {
1582 'id': 'OtqTfy26tG0',
1583 'ext': 'mp4',
1584 'title': 'Burn Out',
1585 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1586 'upload_date': '20141120',
1587 'uploader': 'The Cinematic Orchestra - Topic',
1588 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1589 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1590 'artist': 'The Cinematic Orchestra',
1591 'track': 'Burn Out',
1592 'album': 'Every Day',
1593 'release_data': None,
1594 'release_year': None,
1595 },
1596 'params': {
1597 'skip_download': True,
1598 },
1599 },
1600 {
1601 # controversial video, only works with bpctr when authenticated with cookies
1602 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1603 'only_matching': True,
1604 },
1605 {
1606 # controversial video, requires bpctr/contentCheckOk
1607 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1608 'info_dict': {
1609 'id': 'SZJvDhaSDnc',
1610 'ext': 'mp4',
1611 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1612 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1613 'uploader': 'CBS This Morning',
1614 'uploader_id': 'CBSThisMorning',
1615 'upload_date': '20140716',
1616 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1617 }
1618 },
1619 {
1620 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1621 'url': 'cBvYw8_A0vQ',
1622 'info_dict': {
1623 'id': 'cBvYw8_A0vQ',
1624 'ext': 'mp4',
1625 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1626 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1627 'upload_date': '20201120',
1628 'uploader': 'Walk around Japan',
1629 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1630 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1631 },
1632 'params': {
1633 'skip_download': True,
1634 },
1635 }, {
1636 # Has multiple audio streams
1637 'url': 'WaOKSUlf4TM',
1638 'only_matching': True
1639 }, {
1640 # Requires Premium: has format 141 when requested using YTM url
1641 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1642 'only_matching': True
1643 }, {
1644 # multiple subtitles with same lang_code
1645 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1646 'only_matching': True,
1647 }, {
1648 # Force use android client fallback
1649 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1650 'info_dict': {
1651 'id': 'YOelRv7fMxY',
1652 'title': 'DIGGING A SECRET TUNNEL Part 1',
1653 'ext': '3gp',
1654 'upload_date': '20210624',
1655 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1656 'uploader': 'colinfurze',
1657 'uploader_id': 'colinfurze',
1658 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1659 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1660 },
1661 'params': {
1662 'format': '17', # 3gp format available on android
1663 'extractor_args': {'youtube': {'player_client': ['android']}},
1664 },
1665 },
1666 {
1667 # Skip download of additional client configs (remix client config in this case)
1668 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1669 'only_matching': True,
1670 'params': {
1671 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1672 },
1673 }, {
1674 # shorts
1675 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1676 'only_matching': True,
1677 },
1678 ]
1679
1680 @classmethod
1681 def suitable(cls, url):
1682 from ..utils import parse_qs
1683
1684 qs = parse_qs(url)
1685 if qs.get('list', [None])[0]:
1686 return False
1687 return super(YoutubeIE, cls).suitable(url)
1688
1689 def __init__(self, *args, **kwargs):
1690 super(YoutubeIE, self).__init__(*args, **kwargs)
1691 self._code_cache = {}
1692 self._player_cache = {}
1693
1694 def _extract_player_url(self, *ytcfgs, webpage=None):
1695 player_url = traverse_obj(
1696 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1697 get_all=False, expected_type=compat_str)
1698 if not player_url:
1699 return
1700 if player_url.startswith('//'):
1701 player_url = 'https:' + player_url
1702 elif not re.match(r'https?://', player_url):
1703 player_url = compat_urlparse.urljoin(
1704 'https://www.youtube.com', player_url)
1705 return player_url
1706
1707 def _download_player_url(self, video_id, fatal=False):
1708 res = self._download_webpage(
1709 'https://www.youtube.com/iframe_api',
1710 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1711 if res:
1712 player_version = self._search_regex(
1713 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1714 if player_version:
1715 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1716
1717 def _signature_cache_id(self, example_sig):
1718 """ Return a string representation of a signature """
1719 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1720
1721 @classmethod
1722 def _extract_player_info(cls, player_url):
1723 for player_re in cls._PLAYER_INFO_RE:
1724 id_m = re.search(player_re, player_url)
1725 if id_m:
1726 break
1727 else:
1728 raise ExtractorError('Cannot identify player %r' % player_url)
1729 return id_m.group('id')
1730
1731 def _load_player(self, video_id, player_url, fatal=True):
1732 player_id = self._extract_player_info(player_url)
1733 if player_id not in self._code_cache:
1734 code = self._download_webpage(
1735 player_url, video_id, fatal=fatal,
1736 note='Downloading player ' + player_id,
1737 errnote='Download of %s failed' % player_url)
1738 if code:
1739 self._code_cache[player_id] = code
1740 return self._code_cache.get(player_id)
1741
1742 def _extract_signature_function(self, video_id, player_url, example_sig):
1743 player_id = self._extract_player_info(player_url)
1744
1745 # Read from filesystem cache
1746 func_id = 'js_%s_%s' % (
1747 player_id, self._signature_cache_id(example_sig))
1748 assert os.path.basename(func_id) == func_id
1749
1750 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1751 if cache_spec is not None:
1752 return lambda s: ''.join(s[i] for i in cache_spec)
1753
1754 code = self._load_player(video_id, player_url)
1755 if code:
1756 res = self._parse_sig_js(code)
1757
1758 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1759 cache_res = res(test_string)
1760 cache_spec = [ord(c) for c in cache_res]
1761
1762 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1763 return res
1764
1765 def _print_sig_code(self, func, example_sig):
1766 if not self.get_param('youtube_print_sig_code'):
1767 return
1768
1769 def gen_sig_code(idxs):
1770 def _genslice(start, end, step):
1771 starts = '' if start == 0 else str(start)
1772 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1773 steps = '' if step == 1 else (':%d' % step)
1774 return 's[%s%s%s]' % (starts, ends, steps)
1775
1776 step = None
1777 # Quelch pyflakes warnings - start will be set when step is set
1778 start = '(Never used)'
1779 for i, prev in zip(idxs[1:], idxs[:-1]):
1780 if step is not None:
1781 if i - prev == step:
1782 continue
1783 yield _genslice(start, prev, step)
1784 step = None
1785 continue
1786 if i - prev in [-1, 1]:
1787 step = i - prev
1788 start = prev
1789 continue
1790 else:
1791 yield 's[%d]' % prev
1792 if step is None:
1793 yield 's[%d]' % i
1794 else:
1795 yield _genslice(start, i, step)
1796
1797 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1798 cache_res = func(test_string)
1799 cache_spec = [ord(c) for c in cache_res]
1800 expr_code = ' + '.join(gen_sig_code(cache_spec))
1801 signature_id_tuple = '(%s)' % (
1802 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1803 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1804 ' return %s\n') % (signature_id_tuple, expr_code)
1805 self.to_screen('Extracted signature function:\n' + code)
1806
1807 def _parse_sig_js(self, jscode):
1808 funcname = self._search_regex(
1809 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1810 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1811 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1812 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1813 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1814 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1815 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1816 # Obsolete patterns
1817 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1818 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1819 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1820 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1821 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1822 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1823 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1824 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1825 jscode, 'Initial JS player signature function name', group='sig')
1826
1827 jsi = JSInterpreter(jscode)
1828 initial_function = jsi.extract_function(funcname)
1829 return lambda s: initial_function([s])
1830
1831 def _decrypt_signature(self, s, video_id, player_url):
1832 """Turn the encrypted s field into a working signature"""
1833
1834 if player_url is None:
1835 raise ExtractorError('Cannot decrypt signature without player_url')
1836
1837 try:
1838 player_id = (player_url, self._signature_cache_id(s))
1839 if player_id not in self._player_cache:
1840 func = self._extract_signature_function(
1841 video_id, player_url, s
1842 )
1843 self._player_cache[player_id] = func
1844 func = self._player_cache[player_id]
1845 self._print_sig_code(func, s)
1846 return func(s)
1847 except Exception as e:
1848 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
1849
1850 def _decrypt_nsig(self, s, video_id, player_url):
1851 """Turn the encrypted n field into a working signature"""
1852 if player_url is None:
1853 raise ExtractorError('Cannot decrypt nsig without player_url')
1854 if player_url.startswith('//'):
1855 player_url = 'https:' + player_url
1856 elif not re.match(r'https?://', player_url):
1857 player_url = compat_urlparse.urljoin(
1858 'https://www.youtube.com', player_url)
1859
1860 sig_id = ('nsig_value', s)
1861 if sig_id in self._player_cache:
1862 return self._player_cache[sig_id]
1863
1864 try:
1865 player_id = ('nsig', player_url)
1866 if player_id not in self._player_cache:
1867 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
1868 func = self._player_cache[player_id]
1869 self._player_cache[sig_id] = func(s)
1870 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
1871 return self._player_cache[sig_id]
1872 except Exception as e:
1873 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
1874
1875 def _extract_n_function_name(self, jscode):
1876 return self._search_regex(
1877 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
1878 jscode, 'Initial JS player n function name', group='nfunc')
1879
1880 def _extract_n_function(self, video_id, player_url):
1881 player_id = self._extract_player_info(player_url)
1882 func_code = self._downloader.cache.load('youtube-nsig', player_id)
1883
1884 if func_code:
1885 jsi = JSInterpreter(func_code)
1886 else:
1887 jscode = self._load_player(video_id, player_url)
1888 funcname = self._extract_n_function_name(jscode)
1889 jsi = JSInterpreter(jscode)
1890 func_code = jsi.extract_function_code(funcname)
1891 self._downloader.cache.store('youtube-nsig', player_id, func_code)
1892
1893 if self.get_param('youtube_print_sig_code'):
1894 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
1895
1896 return lambda s: jsi.extract_function_from_code(*func_code)([s])
1897
1898 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1899 """
1900 Extract signatureTimestamp (sts)
1901 Required to tell API what sig/player version is in use.
1902 """
1903 sts = None
1904 if isinstance(ytcfg, dict):
1905 sts = int_or_none(ytcfg.get('STS'))
1906
1907 if not sts:
1908 # Attempt to extract from player
1909 if player_url is None:
1910 error_msg = 'Cannot extract signature timestamp without player_url.'
1911 if fatal:
1912 raise ExtractorError(error_msg)
1913 self.report_warning(error_msg)
1914 return
1915 code = self._load_player(video_id, player_url, fatal=fatal)
1916 if code:
1917 sts = int_or_none(self._search_regex(
1918 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1919 'JS player signature timestamp', group='sts', fatal=fatal))
1920 return sts
1921
1922 def _mark_watched(self, video_id, player_responses):
1923 playback_url = traverse_obj(
1924 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1925 expected_type=url_or_none, get_all=False)
1926 if not playback_url:
1927 self.report_warning('Unable to mark watched')
1928 return
1929 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1930 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1931
1932 # cpn generation algorithm is reverse engineered from base.js.
1933 # In fact it works even with dummy cpn.
1934 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1935 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1936
1937 qs.update({
1938 'ver': ['2'],
1939 'cpn': [cpn],
1940 })
1941 playback_url = compat_urlparse.urlunparse(
1942 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1943
1944 self._download_webpage(
1945 playback_url, video_id, 'Marking watched',
1946 'Unable to mark watched', fatal=False)
1947
1948 @staticmethod
1949 def _extract_urls(webpage):
1950 # Embedded YouTube player
1951 entries = [
1952 unescapeHTML(mobj.group('url'))
1953 for mobj in re.finditer(r'''(?x)
1954 (?:
1955 <iframe[^>]+?src=|
1956 data-video-url=|
1957 <embed[^>]+?src=|
1958 embedSWF\(?:\s*|
1959 <object[^>]+data=|
1960 new\s+SWFObject\(
1961 )
1962 (["\'])
1963 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1964 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1965 \1''', webpage)]
1966
1967 # lazyYT YouTube embed
1968 entries.extend(list(map(
1969 unescapeHTML,
1970 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1971
1972 # Wordpress "YouTube Video Importer" plugin
1973 matches = re.findall(r'''(?x)<div[^>]+
1974 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1975 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1976 entries.extend(m[-1] for m in matches)
1977
1978 return entries
1979
1980 @staticmethod
1981 def _extract_url(webpage):
1982 urls = YoutubeIE._extract_urls(webpage)
1983 return urls[0] if urls else None
1984
1985 @classmethod
1986 def extract_id(cls, url):
1987 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1988 if mobj is None:
1989 raise ExtractorError('Invalid URL: %s' % url)
1990 return mobj.group('id')
1991
1992 def _extract_chapters_from_json(self, data, duration):
1993 chapter_list = traverse_obj(
1994 data, (
1995 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
1996 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
1997 ), expected_type=list)
1998
1999 return self._extract_chapters(
2000 chapter_list,
2001 chapter_time=lambda chapter: float_or_none(
2002 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2003 chapter_title=lambda chapter: traverse_obj(
2004 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2005 duration=duration)
2006
2007 def _extract_chapters_from_engagement_panel(self, data, duration):
2008 content_list = traverse_obj(
2009 data,
2010 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2011 expected_type=list, default=[])
2012 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2013 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2014
2015 return next((
2016 filter(None, (
2017 self._extract_chapters(
2018 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2019 chapter_time, chapter_title, duration)
2020 for contents in content_list
2021 ))), [])
2022
2023 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2024 chapters = []
2025 last_chapter = {'start_time': 0}
2026 for idx, chapter in enumerate(chapter_list or []):
2027 title = chapter_title(chapter)
2028 start_time = chapter_time(chapter)
2029 if start_time is None:
2030 continue
2031 last_chapter['end_time'] = start_time
2032 if start_time < last_chapter['start_time']:
2033 if idx == 1:
2034 chapters.pop()
2035 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2036 else:
2037 self.report_warning(f'Invalid start time for chapter "{title}"')
2038 continue
2039 last_chapter = {'start_time': start_time, 'title': title}
2040 chapters.append(last_chapter)
2041 last_chapter['end_time'] = duration
2042 return chapters
2043
2044 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2045 return self._parse_json(self._search_regex(
2046 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2047 regex), webpage, name, default='{}'), video_id, fatal=False)
2048
2049 @staticmethod
2050 def parse_time_text(time_text):
2051 """
2052 Parse the comment time text
2053 time_text is in the format 'X units ago (edited)'
2054 """
2055 time_text_split = time_text.split(' ')
2056 if len(time_text_split) >= 3:
2057 try:
2058 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2059 except ValueError:
2060 return None
2061
2062 def _extract_comment(self, comment_renderer, parent=None):
2063 comment_id = comment_renderer.get('commentId')
2064 if not comment_id:
2065 return
2066
2067 text = self._get_text(comment_renderer, 'contentText')
2068
2069 # note: timestamp is an estimate calculated from the current time and time_text
2070 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2071 time_text_dt = self.parse_time_text(time_text)
2072 if isinstance(time_text_dt, datetime.datetime):
2073 timestamp = calendar.timegm(time_text_dt.timetuple())
2074 author = self._get_text(comment_renderer, 'authorText')
2075 author_id = try_get(comment_renderer,
2076 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2077
2078 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2079 lambda x: x['likeCount']), compat_str)) or 0
2080 author_thumbnail = try_get(comment_renderer,
2081 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2082
2083 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2084 is_favorited = 'creatorHeart' in (try_get(
2085 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2086 return {
2087 'id': comment_id,
2088 'text': text,
2089 'timestamp': timestamp,
2090 'time_text': time_text,
2091 'like_count': votes,
2092 'is_favorited': is_favorited,
2093 'author': author,
2094 'author_id': author_id,
2095 'author_thumbnail': author_thumbnail,
2096 'author_is_uploader': author_is_uploader,
2097 'parent': parent or 'root'
2098 }
2099
2100 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None):
2101
2102 def extract_header(contents):
2103 _continuation = None
2104 for content in contents:
2105 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2106 expected_comment_count = parse_count(self._get_text(
2107 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2108
2109 if expected_comment_count:
2110 comment_counts[1] = expected_comment_count
2111 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2112 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2113 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2114
2115 sort_menu_item = try_get(
2116 comments_header_renderer,
2117 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2118 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2119
2120 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2121 if not _continuation:
2122 continue
2123
2124 sort_text = sort_menu_item.get('title')
2125 if isinstance(sort_text, compat_str):
2126 sort_text = sort_text.lower()
2127 else:
2128 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2129 self.to_screen('Sorting comments by %s' % sort_text)
2130 break
2131 return _continuation
2132
2133 def extract_thread(contents):
2134 if not parent:
2135 comment_counts[2] = 0
2136 for content in contents:
2137 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2138 comment_renderer = try_get(
2139 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2140 content, (lambda x: x['commentRenderer'], dict))
2141
2142 if not comment_renderer:
2143 continue
2144 comment = self._extract_comment(comment_renderer, parent)
2145 if not comment:
2146 continue
2147 comment_counts[0] += 1
2148 yield comment
2149 # Attempt to get the replies
2150 comment_replies_renderer = try_get(
2151 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2152
2153 if comment_replies_renderer:
2154 comment_counts[2] += 1
2155 comment_entries_iter = self._comment_entries(
2156 comment_replies_renderer, ytcfg, video_id,
2157 parent=comment.get('id'), comment_counts=comment_counts)
2158
2159 for reply_comment in comment_entries_iter:
2160 yield reply_comment
2161
2162 # YouTube comments have a max depth of 2
2163 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2164 if max_depth == 1 and parent:
2165 return
2166 if not comment_counts:
2167 # comment so far, est. total comments, current comment thread #
2168 comment_counts = [0, 0, 0]
2169
2170 continuation = self._extract_continuation(root_continuation_data)
2171 if continuation and len(continuation['continuation']) < 27:
2172 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2173 continuation_token = self._generate_comment_continuation(video_id)
2174 continuation = self._build_api_continuation_query(continuation_token, None)
2175
2176 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2177 if message and not parent:
2178 self.report_warning(message, video_id=video_id)
2179
2180 visitor_data = None
2181 is_first_continuation = parent is None
2182
2183 for page_num in itertools.count(0):
2184 if not continuation:
2185 break
2186 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
2187 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2188 if page_num == 0:
2189 if is_first_continuation:
2190 note_prefix = 'Downloading comment section API JSON'
2191 else:
2192 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2193 comment_counts[2], comment_prog_str)
2194 else:
2195 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2196 ' ' if parent else '', ' replies' if parent else '',
2197 page_num, comment_prog_str)
2198
2199 response = self._extract_response(
2200 item_id=None, query=continuation,
2201 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2202 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2203 if not response:
2204 break
2205 visitor_data = try_get(
2206 response,
2207 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2208 compat_str) or visitor_data
2209
2210 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2211
2212 continuation = None
2213 if isinstance(continuation_contents, list):
2214 for continuation_section in continuation_contents:
2215 if not isinstance(continuation_section, dict):
2216 continue
2217 continuation_items = try_get(
2218 continuation_section,
2219 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2220 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2221 list) or []
2222 if is_first_continuation:
2223 continuation = extract_header(continuation_items)
2224 is_first_continuation = False
2225 if continuation:
2226 break
2227 continue
2228 count = 0
2229 for count, entry in enumerate(extract_thread(continuation_items)):
2230 yield entry
2231 continuation = self._extract_continuation({'contents': continuation_items})
2232 if continuation:
2233 # Sometimes YouTube provides a continuation without any comments
2234 # In most cases we end up just downloading these with very little comments to come.
2235 if count == 0:
2236 if not parent:
2237 self.report_warning('No comments received - assuming end of comments')
2238 continuation = None
2239 break
2240
2241 # Deprecated response structure
2242 elif isinstance(continuation_contents, dict):
2243 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2244 for key, continuation_renderer in continuation_contents.items():
2245 if key not in known_continuation_renderers:
2246 continue
2247 if not isinstance(continuation_renderer, dict):
2248 continue
2249 if is_first_continuation:
2250 header_continuation_items = [continuation_renderer.get('header') or {}]
2251 continuation = extract_header(header_continuation_items)
2252 is_first_continuation = False
2253 if continuation:
2254 break
2255
2256 # Sometimes YouTube provides a continuation without any comments
2257 # In most cases we end up just downloading these with very little comments to come.
2258 count = 0
2259 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2260 yield entry
2261 continuation = self._extract_continuation(continuation_renderer)
2262 if count == 0:
2263 if not parent:
2264 self.report_warning('No comments received - assuming end of comments')
2265 continuation = None
2266 break
2267
2268 @staticmethod
2269 def _generate_comment_continuation(video_id):
2270 """
2271 Generates initial comment section continuation token from given video id
2272 """
2273 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2274 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2275 new_continuation_intlist = list(itertools.chain.from_iterable(
2276 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2277 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2278
2279 def _get_comments(self, ytcfg, video_id, contents, webpage):
2280 """Entry for comment extraction"""
2281 def _real_comment_extract(contents):
2282 renderer = next((
2283 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2284 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2285 yield from self._comment_entries(renderer, ytcfg, video_id)
2286
2287 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
2288 # Force English regardless of account setting to prevent parsing issues
2289 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2290 ytcfg = copy.deepcopy(ytcfg)
2291 traverse_obj(
2292 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2293 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
2294
2295 @staticmethod
2296 def _get_checkok_params():
2297 return {'contentCheckOk': True, 'racyCheckOk': True}
2298
2299 @classmethod
2300 def _generate_player_context(cls, sts=None):
2301 context = {
2302 'html5Preference': 'HTML5_PREF_WANTS',
2303 }
2304 if sts is not None:
2305 context['signatureTimestamp'] = sts
2306 return {
2307 'playbackContext': {
2308 'contentPlaybackContext': context
2309 },
2310 **cls._get_checkok_params()
2311 }
2312
2313 @staticmethod
2314 def _is_agegated(player_response):
2315 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2316 return True
2317
2318 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2319 AGE_GATE_REASONS = (
2320 'confirm your age', 'age-restricted', 'inappropriate', # reason
2321 'age_verification_required', 'age_check_required', # status
2322 )
2323 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2324
2325 @staticmethod
2326 def _is_unplayable(player_response):
2327 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2328
2329 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2330
2331 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2332 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2333 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2334 headers = self.generate_api_headers(
2335 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2336
2337 yt_query = {'videoId': video_id}
2338 yt_query.update(self._generate_player_context(sts))
2339 return self._extract_response(
2340 item_id=video_id, ep='player', query=yt_query,
2341 ytcfg=player_ytcfg, headers=headers, fatal=True,
2342 default_client=client,
2343 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2344 ) or None
2345
2346 def _get_requested_clients(self, url, smuggled_data):
2347 requested_clients = []
2348 default = ['android', 'web']
2349 allowed_clients = sorted(
2350 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2351 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2352 for client in self._configuration_arg('player_client'):
2353 if client in allowed_clients:
2354 requested_clients.append(client)
2355 elif client == 'default':
2356 requested_clients.extend(default)
2357 elif client == 'all':
2358 requested_clients.extend(allowed_clients)
2359 else:
2360 self.report_warning(f'Skipping unsupported client {client}')
2361 if not requested_clients:
2362 requested_clients = default
2363
2364 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2365 requested_clients.extend(
2366 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2367
2368 return orderedSet(requested_clients)
2369
2370 def _extract_player_ytcfg(self, client, video_id):
2371 url = {
2372 'web_music': 'https://music.youtube.com',
2373 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2374 }.get(client)
2375 if not url:
2376 return {}
2377 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2378 return self.extract_ytcfg(video_id, webpage) or {}
2379
2380 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2381 initial_pr = None
2382 if webpage:
2383 initial_pr = self._extract_yt_initial_variable(
2384 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2385 video_id, 'initial player response')
2386
2387 original_clients = clients
2388 clients = clients[::-1]
2389 prs = []
2390
2391 def append_client(client_name):
2392 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2393 clients.append(client_name)
2394
2395 # Android player_response does not have microFormats which are needed for
2396 # extraction of some data. So we return the initial_pr with formats
2397 # stripped out even if not requested by the user
2398 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2399 if initial_pr:
2400 pr = dict(initial_pr)
2401 pr['streamingData'] = None
2402 prs.append(pr)
2403
2404 last_error = None
2405 tried_iframe_fallback = False
2406 player_url = None
2407 while clients:
2408 client = clients.pop()
2409 player_ytcfg = master_ytcfg if client == 'web' else {}
2410 if 'configs' not in self._configuration_arg('player_skip'):
2411 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2412
2413 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2414 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2415 if 'js' in self._configuration_arg('player_skip'):
2416 require_js_player = False
2417 player_url = None
2418
2419 if not player_url and not tried_iframe_fallback and require_js_player:
2420 player_url = self._download_player_url(video_id)
2421 tried_iframe_fallback = True
2422
2423 try:
2424 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2425 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2426 except ExtractorError as e:
2427 if last_error:
2428 self.report_warning(last_error)
2429 last_error = e
2430 continue
2431
2432 if pr:
2433 prs.append(pr)
2434
2435 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2436 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2437 append_client(client.replace('_agegate', '_creator'))
2438 elif self._is_agegated(pr):
2439 append_client(f'{client}_agegate')
2440
2441 if last_error:
2442 if not len(prs):
2443 raise last_error
2444 self.report_warning(last_error)
2445 return prs, player_url
2446
2447 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2448 itags, stream_ids = {}, []
2449 itag_qualities, res_qualities = {}, {}
2450 q = qualities([
2451 # Normally tiny is the smallest video-only formats. But
2452 # audio-only formats with unknown quality may get tagged as tiny
2453 'tiny',
2454 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2455 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2456 ])
2457 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2458
2459 for fmt in streaming_formats:
2460 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2461 continue
2462
2463 itag = str_or_none(fmt.get('itag'))
2464 audio_track = fmt.get('audioTrack') or {}
2465 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2466 if stream_id in stream_ids:
2467 continue
2468
2469 quality = fmt.get('quality')
2470 height = int_or_none(fmt.get('height'))
2471 if quality == 'tiny' or not quality:
2472 quality = fmt.get('audioQuality', '').lower() or quality
2473 # The 3gp format (17) in android client has a quality of "small",
2474 # but is actually worse than other formats
2475 if itag == '17':
2476 quality = 'tiny'
2477 if quality:
2478 if itag:
2479 itag_qualities[itag] = quality
2480 if height:
2481 res_qualities[height] = quality
2482 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2483 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2484 # number of fragment that would subsequently requested with (`&sq=N`)
2485 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2486 continue
2487
2488 fmt_url = fmt.get('url')
2489 if not fmt_url:
2490 sc = compat_parse_qs(fmt.get('signatureCipher'))
2491 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2492 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2493 if not (sc and fmt_url and encrypted_sig):
2494 continue
2495 if not player_url:
2496 continue
2497 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2498 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2499 fmt_url += '&' + sp + '=' + signature
2500
2501 query = parse_qs(fmt_url)
2502 throttled = False
2503 if query.get('ratebypass') != ['yes'] and query.get('n'):
2504 try:
2505 fmt_url = update_url_query(fmt_url, {
2506 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2507 except ExtractorError as e:
2508 self.report_warning(
2509 f'nsig extraction failed: You may experience throttling for some formats\n'
2510 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
2511 throttled = True
2512
2513 if itag:
2514 itags[itag] = 'https'
2515 stream_ids.append(stream_id)
2516
2517 tbr = float_or_none(
2518 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2519 dct = {
2520 'asr': int_or_none(fmt.get('audioSampleRate')),
2521 'filesize': int_or_none(fmt.get('contentLength')),
2522 'format_id': itag,
2523 'format_note': join_nonempty(
2524 '%s%s' % (audio_track.get('displayName') or '',
2525 ' (default)' if audio_track.get('audioIsDefault') else ''),
2526 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
2527 throttled and 'THROTTLED', delim=', '),
2528 'source_preference': -10 if throttled else -1,
2529 'fps': int_or_none(fmt.get('fps')) or None,
2530 'height': height,
2531 'quality': q(quality),
2532 'tbr': tbr,
2533 'url': fmt_url,
2534 'width': int_or_none(fmt.get('width')),
2535 'language': audio_track.get('id', '').split('.')[0],
2536 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2537 }
2538 mime_mobj = re.match(
2539 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2540 if mime_mobj:
2541 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2542 dct.update(parse_codecs(mime_mobj.group(2)))
2543 no_audio = dct.get('acodec') == 'none'
2544 no_video = dct.get('vcodec') == 'none'
2545 if no_audio:
2546 dct['vbr'] = tbr
2547 if no_video:
2548 dct['abr'] = tbr
2549 if no_audio or no_video:
2550 dct['downloader_options'] = {
2551 # Youtube throttles chunks >~10M
2552 'http_chunk_size': 10485760,
2553 }
2554 if dct.get('ext'):
2555 dct['container'] = dct['ext'] + '_dash'
2556 yield dct
2557
2558 skip_manifests = self._configuration_arg('skip')
2559 get_dash = (
2560 (not is_live or self._configuration_arg('include_live_dash'))
2561 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2562 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2563
2564 def process_manifest_format(f, proto, itag):
2565 if itag in itags:
2566 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2567 return False
2568 itag = f'{itag}-{proto}'
2569 if itag:
2570 f['format_id'] = itag
2571 itags[itag] = proto
2572
2573 f['quality'] = next((
2574 q(qdict[val])
2575 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
2576 if val in qdict), -1)
2577 return True
2578
2579 for sd in streaming_data:
2580 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2581 if hls_manifest_url:
2582 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2583 if process_manifest_format(f, 'hls', self._search_regex(
2584 r'/itag/(\d+)', f['url'], 'itag', default=None)):
2585 yield f
2586
2587 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2588 if dash_manifest_url:
2589 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2590 if process_manifest_format(f, 'dash', f['format_id']):
2591 f['filesize'] = int_or_none(self._search_regex(
2592 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2593 yield f
2594
2595 def _extract_storyboard(self, player_responses, duration):
2596 spec = get_first(
2597 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2598 if not spec:
2599 return
2600 base_url = spec.pop()
2601 L = len(spec) - 1
2602 for i, args in enumerate(spec):
2603 args = args.split('#')
2604 counts = list(map(int_or_none, args[:5]))
2605 if len(args) != 8 or not all(counts):
2606 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2607 continue
2608 width, height, frame_count, cols, rows = counts
2609 N, sigh = args[6:]
2610
2611 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2612 fragment_count = frame_count / (cols * rows)
2613 fragment_duration = duration / fragment_count
2614 yield {
2615 'format_id': f'sb{i}',
2616 'format_note': 'storyboard',
2617 'ext': 'mhtml',
2618 'protocol': 'mhtml',
2619 'acodec': 'none',
2620 'vcodec': 'none',
2621 'url': url,
2622 'width': width,
2623 'height': height,
2624 'fragments': [{
2625 'path': url.replace('$M', str(j)),
2626 'duration': min(fragment_duration, duration - (j * fragment_duration)),
2627 } for j in range(math.ceil(fragment_count))],
2628 }
2629
2630 def _real_extract(self, url):
2631 url, smuggled_data = unsmuggle_url(url, {})
2632 video_id = self._match_id(url)
2633
2634 base_url = self.http_scheme() + '//www.youtube.com/'
2635 webpage_url = base_url + 'watch?v=' + video_id
2636 webpage = None
2637 if 'webpage' not in self._configuration_arg('player_skip'):
2638 webpage = self._download_webpage(
2639 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2640
2641 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2642
2643 player_responses, player_url = self._extract_player_responses(
2644 self._get_requested_clients(url, smuggled_data),
2645 video_id, webpage, master_ytcfg)
2646
2647 playability_statuses = traverse_obj(
2648 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2649
2650 trailer_video_id = get_first(
2651 playability_statuses,
2652 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2653 expected_type=str)
2654 if trailer_video_id:
2655 return self.url_result(
2656 trailer_video_id, self.ie_key(), trailer_video_id)
2657
2658 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2659 if webpage else (lambda x: None))
2660
2661 video_details = traverse_obj(
2662 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2663 microformats = traverse_obj(
2664 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2665 expected_type=dict, default=[])
2666 video_title = (
2667 get_first(video_details, 'title')
2668 or self._get_text(microformats, (..., 'title'))
2669 or search_meta(['og:title', 'twitter:title', 'title']))
2670 video_description = get_first(video_details, 'shortDescription')
2671
2672 multifeed_metadata_list = get_first(
2673 player_responses,
2674 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2675 expected_type=str)
2676 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2677 if self.get_param('noplaylist'):
2678 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2679 else:
2680 entries = []
2681 feed_ids = []
2682 for feed in multifeed_metadata_list.split(','):
2683 # Unquote should take place before split on comma (,) since textual
2684 # fields may contain comma as well (see
2685 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2686 feed_data = compat_parse_qs(
2687 compat_urllib_parse_unquote_plus(feed))
2688
2689 def feed_entry(name):
2690 return try_get(
2691 feed_data, lambda x: x[name][0], compat_str)
2692
2693 feed_id = feed_entry('id')
2694 if not feed_id:
2695 continue
2696 feed_title = feed_entry('title')
2697 title = video_title
2698 if feed_title:
2699 title += ' (%s)' % feed_title
2700 entries.append({
2701 '_type': 'url_transparent',
2702 'ie_key': 'Youtube',
2703 'url': smuggle_url(
2704 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2705 {'force_singlefeed': True}),
2706 'title': title,
2707 })
2708 feed_ids.append(feed_id)
2709 self.to_screen(
2710 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2711 % (', '.join(feed_ids), video_id))
2712 return self.playlist_result(
2713 entries, video_id, video_title, video_description)
2714
2715 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2716 is_live = get_first(video_details, 'isLive')
2717 if is_live is None:
2718 is_live = get_first(live_broadcast_details, 'isLiveNow')
2719
2720 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2721 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2722
2723 if not formats:
2724 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2725 self.report_drm(video_id)
2726 pemr = get_first(
2727 playability_statuses,
2728 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2729 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2730 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2731 if subreason:
2732 if subreason == 'The uploader has not made this video available in your country.':
2733 countries = get_first(microformats, 'availableCountries')
2734 if not countries:
2735 regions_allowed = search_meta('regionsAllowed')
2736 countries = regions_allowed.split(',') if regions_allowed else None
2737 self.raise_geo_restricted(subreason, countries, metadata_available=True)
2738 reason += f'. {subreason}'
2739 if reason:
2740 self.raise_no_formats(reason, expected=True)
2741
2742 keywords = get_first(video_details, 'keywords', expected_type=list) or []
2743 if not keywords and webpage:
2744 keywords = [
2745 unescapeHTML(m.group('content'))
2746 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2747 for keyword in keywords:
2748 if keyword.startswith('yt:stretch='):
2749 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2750 if mobj:
2751 # NB: float is intentional for forcing float division
2752 w, h = (float(v) for v in mobj.groups())
2753 if w > 0 and h > 0:
2754 ratio = w / h
2755 for f in formats:
2756 if f.get('vcodec') != 'none':
2757 f['stretched_ratio'] = ratio
2758 break
2759
2760 thumbnails = []
2761 thumbnail_dicts = traverse_obj(
2762 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2763 expected_type=dict, default=[])
2764 for thumbnail in thumbnail_dicts:
2765 thumbnail_url = thumbnail.get('url')
2766 if not thumbnail_url:
2767 continue
2768 # Sometimes youtube gives a wrong thumbnail URL. See:
2769 # https://github.com/yt-dlp/yt-dlp/issues/233
2770 # https://github.com/ytdl-org/youtube-dl/issues/28023
2771 if 'maxresdefault' in thumbnail_url:
2772 thumbnail_url = thumbnail_url.split('?')[0]
2773 thumbnails.append({
2774 'url': thumbnail_url,
2775 'height': int_or_none(thumbnail.get('height')),
2776 'width': int_or_none(thumbnail.get('width')),
2777 })
2778 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2779 if thumbnail_url:
2780 thumbnails.append({
2781 'url': thumbnail_url,
2782 })
2783 original_thumbnails = thumbnails.copy()
2784
2785 # The best resolution thumbnails sometimes does not appear in the webpage
2786 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2787 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2788 thumbnail_names = [
2789 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
2790 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2791 'mqdefault', 'mq1', 'mq2', 'mq3',
2792 'default', '1', '2', '3'
2793 ]
2794 n_thumbnail_names = len(thumbnail_names)
2795 thumbnails.extend({
2796 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2797 video_id=video_id, name=name, ext=ext,
2798 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2799 } for name in thumbnail_names for ext in ('webp', 'jpg'))
2800 for thumb in thumbnails:
2801 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2802 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2803 self._remove_duplicate_formats(thumbnails)
2804 self._downloader._sort_thumbnails(original_thumbnails)
2805
2806 category = get_first(microformats, 'category') or search_meta('genre')
2807 channel_id = str_or_none(
2808 get_first(video_details, 'channelId')
2809 or get_first(microformats, 'externalChannelId')
2810 or search_meta('channelId'))
2811 duration = int_or_none(
2812 get_first(video_details, 'lengthSeconds')
2813 or get_first(microformats, 'lengthSeconds')
2814 or parse_duration(search_meta('duration'))) or None
2815 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2816
2817 live_content = get_first(video_details, 'isLiveContent')
2818 is_upcoming = get_first(video_details, 'isUpcoming')
2819 if is_live is None:
2820 if is_upcoming or live_content is False:
2821 is_live = False
2822 if is_upcoming is None and (live_content or is_live):
2823 is_upcoming = False
2824 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2825 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2826 if not duration and live_endtime and live_starttime:
2827 duration = live_endtime - live_starttime
2828
2829 formats.extend(self._extract_storyboard(player_responses, duration))
2830
2831 # Source is given priority since formats that throttle are given lower source_preference
2832 # When throttling issue is fully fixed, remove this
2833 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2834
2835 info = {
2836 'id': video_id,
2837 'title': self._live_title(video_title) if is_live else video_title,
2838 'formats': formats,
2839 'thumbnails': thumbnails,
2840 # The best thumbnail that we are sure exists. Prevents unnecessary
2841 # URL checking if user don't care about getting the best possible thumbnail
2842 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
2843 'description': video_description,
2844 'upload_date': unified_strdate(
2845 get_first(microformats, 'uploadDate')
2846 or search_meta('uploadDate')),
2847 'uploader': get_first(video_details, 'author'),
2848 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2849 'uploader_url': owner_profile_url,
2850 'channel_id': channel_id,
2851 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2852 'duration': duration,
2853 'view_count': int_or_none(
2854 get_first((video_details, microformats), (..., 'viewCount'))
2855 or search_meta('interactionCount')),
2856 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2857 'age_limit': 18 if (
2858 get_first(microformats, 'isFamilySafe') is False
2859 or search_meta('isFamilyFriendly') == 'false'
2860 or search_meta('og:restrictions:age') == '18+') else 0,
2861 'webpage_url': webpage_url,
2862 'categories': [category] if category else None,
2863 'tags': keywords,
2864 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2865 'is_live': is_live,
2866 'was_live': (False if is_live or is_upcoming or live_content is False
2867 else None if is_live is None or is_upcoming is None
2868 else live_content),
2869 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2870 'release_timestamp': live_starttime,
2871 }
2872
2873 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2874 if pctr:
2875 def get_lang_code(track):
2876 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
2877 or track.get('languageCode'))
2878
2879 # Converted into dicts to remove duplicates
2880 captions = {
2881 get_lang_code(sub): sub
2882 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2883 translation_languages = {
2884 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
2885 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2886
2887 def process_language(container, base_url, lang_code, sub_name, query):
2888 lang_subs = container.setdefault(lang_code, [])
2889 for fmt in self._SUBTITLE_FORMATS:
2890 query.update({
2891 'fmt': fmt,
2892 })
2893 lang_subs.append({
2894 'ext': fmt,
2895 'url': update_url_query(base_url, query),
2896 'name': sub_name,
2897 })
2898
2899 subtitles, automatic_captions = {}, {}
2900 for lang_code, caption_track in captions.items():
2901 base_url = caption_track.get('baseUrl')
2902 if not base_url:
2903 continue
2904 lang_name = self._get_text(caption_track, 'name', max_runs=1)
2905 if caption_track.get('kind') != 'asr':
2906 if not lang_code:
2907 continue
2908 process_language(
2909 subtitles, base_url, lang_code, lang_name, {})
2910 if not caption_track.get('isTranslatable'):
2911 continue
2912 for trans_code, trans_name in translation_languages.items():
2913 if not trans_code:
2914 continue
2915 if caption_track.get('kind') != 'asr':
2916 trans_code += f'-{lang_code}'
2917 trans_name += format_field(lang_name, template=' from %s')
2918 process_language(
2919 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
2920 info['automatic_captions'] = automatic_captions
2921 info['subtitles'] = subtitles
2922
2923 parsed_url = compat_urllib_parse_urlparse(url)
2924 for component in [parsed_url.fragment, parsed_url.query]:
2925 query = compat_parse_qs(component)
2926 for k, v in query.items():
2927 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2928 d_k += '_time'
2929 if d_k not in info and k in s_ks:
2930 info[d_k] = parse_duration(query[k][0])
2931
2932 # Youtube Music Auto-generated description
2933 if video_description:
2934 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2935 if mobj:
2936 release_year = mobj.group('release_year')
2937 release_date = mobj.group('release_date')
2938 if release_date:
2939 release_date = release_date.replace('-', '')
2940 if not release_year:
2941 release_year = release_date[:4]
2942 info.update({
2943 'album': mobj.group('album'.strip()),
2944 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2945 'track': mobj.group('track').strip(),
2946 'release_date': release_date,
2947 'release_year': int_or_none(release_year),
2948 })
2949
2950 initial_data = None
2951 if webpage:
2952 initial_data = self._extract_yt_initial_variable(
2953 webpage, self._YT_INITIAL_DATA_RE, video_id,
2954 'yt initial data')
2955 if not initial_data:
2956 query = {'videoId': video_id}
2957 query.update(self._get_checkok_params())
2958 initial_data = self._extract_response(
2959 item_id=video_id, ep='next', fatal=False,
2960 ytcfg=master_ytcfg, query=query,
2961 headers=self.generate_api_headers(ytcfg=master_ytcfg),
2962 note='Downloading initial data API JSON')
2963
2964 try:
2965 # This will error if there is no livechat
2966 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2967 info.setdefault('subtitles', {})['live_chat'] = [{
2968 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2969 'video_id': video_id,
2970 'ext': 'json',
2971 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2972 }]
2973 except (KeyError, IndexError, TypeError):
2974 pass
2975
2976 if initial_data:
2977 info['chapters'] = (
2978 self._extract_chapters_from_json(initial_data, duration)
2979 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2980 or None)
2981
2982 contents = try_get(
2983 initial_data,
2984 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2985 list) or []
2986 for content in contents:
2987 vpir = content.get('videoPrimaryInfoRenderer')
2988 if vpir:
2989 stl = vpir.get('superTitleLink')
2990 if stl:
2991 stl = self._get_text(stl)
2992 if try_get(
2993 vpir,
2994 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2995 info['location'] = stl
2996 else:
2997 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2998 if mobj:
2999 info.update({
3000 'series': mobj.group(1),
3001 'season_number': int(mobj.group(2)),
3002 'episode_number': int(mobj.group(3)),
3003 })
3004 for tlb in (try_get(
3005 vpir,
3006 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3007 list) or []):
3008 tbr = tlb.get('toggleButtonRenderer') or {}
3009 for getter, regex in [(
3010 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3011 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3012 lambda x: x['accessibility'],
3013 lambda x: x['accessibilityData']['accessibilityData'],
3014 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3015 label = (try_get(tbr, getter, dict) or {}).get('label')
3016 if label:
3017 mobj = re.match(regex, label)
3018 if mobj:
3019 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3020 break
3021 sbr_tooltip = try_get(
3022 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3023 if sbr_tooltip:
3024 like_count, dislike_count = sbr_tooltip.split(' / ')
3025 info.update({
3026 'like_count': str_to_int(like_count),
3027 'dislike_count': str_to_int(dislike_count),
3028 })
3029 vsir = content.get('videoSecondaryInfoRenderer')
3030 if vsir:
3031 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3032 rows = try_get(
3033 vsir,
3034 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3035 list) or []
3036 multiple_songs = False
3037 for row in rows:
3038 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3039 multiple_songs = True
3040 break
3041 for row in rows:
3042 mrr = row.get('metadataRowRenderer') or {}
3043 mrr_title = mrr.get('title')
3044 if not mrr_title:
3045 continue
3046 mrr_title = self._get_text(mrr, 'title')
3047 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3048 if mrr_title == 'License':
3049 info['license'] = mrr_contents_text
3050 elif not multiple_songs:
3051 if mrr_title == 'Album':
3052 info['album'] = mrr_contents_text
3053 elif mrr_title == 'Artist':
3054 info['artist'] = mrr_contents_text
3055 elif mrr_title == 'Song':
3056 info['track'] = mrr_contents_text
3057
3058 fallbacks = {
3059 'channel': 'uploader',
3060 'channel_id': 'uploader_id',
3061 'channel_url': 'uploader_url',
3062 }
3063 for to, frm in fallbacks.items():
3064 if not info.get(to):
3065 info[to] = info.get(frm)
3066
3067 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3068 v = info.get(s_k)
3069 if v:
3070 info[d_k] = v
3071
3072 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3073 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3074 is_membersonly = None
3075 is_premium = None
3076 if initial_data and is_private is not None:
3077 is_membersonly = False
3078 is_premium = False
3079 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3080 badge_labels = set()
3081 for content in contents:
3082 if not isinstance(content, dict):
3083 continue
3084 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3085 for badge_label in badge_labels:
3086 if badge_label.lower() == 'members only':
3087 is_membersonly = True
3088 elif badge_label.lower() == 'premium':
3089 is_premium = True
3090 elif badge_label.lower() == 'unlisted':
3091 is_unlisted = True
3092
3093 info['availability'] = self._availability(
3094 is_private=is_private,
3095 needs_premium=is_premium,
3096 needs_subscription=is_membersonly,
3097 needs_auth=info['age_limit'] >= 18,
3098 is_unlisted=None if is_private is None else is_unlisted)
3099
3100 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3101
3102 self.mark_watched(video_id, player_responses)
3103
3104 return info
3105
3106
3107class YoutubeTabIE(YoutubeBaseInfoExtractor):
3108 IE_DESC = 'YouTube Tabs'
3109 _VALID_URL = r'''(?x)
3110 https?://
3111 (?:\w+\.)?
3112 (?:
3113 youtube(?:kids)?\.com|
3114 %(invidious)s
3115 )/
3116 (?:
3117 (?P<channel_type>channel|c|user|browse)/|
3118 (?P<not_channel>
3119 feed/|hashtag/|
3120 (?:playlist|watch)\?.*?\blist=
3121 )|
3122 (?!(?:%(reserved_names)s)\b) # Direct URLs
3123 )
3124 (?P<id>[^/?\#&]+)
3125 ''' % {
3126 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3127 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3128 }
3129 IE_NAME = 'youtube:tab'
3130
3131 _TESTS = [{
3132 'note': 'playlists, multipage',
3133 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3134 'playlist_mincount': 94,
3135 'info_dict': {
3136 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3137 'title': 'Игорь Клейнер - Playlists',
3138 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3139 'uploader': 'Игорь Клейнер',
3140 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3141 },
3142 }, {
3143 'note': 'playlists, multipage, different order',
3144 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3145 'playlist_mincount': 94,
3146 'info_dict': {
3147 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3148 'title': 'Игорь Клейнер - Playlists',
3149 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3150 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3151 'uploader': 'Игорь Клейнер',
3152 },
3153 }, {
3154 'note': 'playlists, series',
3155 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3156 'playlist_mincount': 5,
3157 'info_dict': {
3158 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3159 'title': '3Blue1Brown - Playlists',
3160 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3161 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3162 'uploader': '3Blue1Brown',
3163 },
3164 }, {
3165 'note': 'playlists, singlepage',
3166 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3167 'playlist_mincount': 4,
3168 'info_dict': {
3169 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3170 'title': 'ThirstForScience - Playlists',
3171 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3172 'uploader': 'ThirstForScience',
3173 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3174 }
3175 }, {
3176 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3177 'only_matching': True,
3178 }, {
3179 'note': 'basic, single video playlist',
3180 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3181 'info_dict': {
3182 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3183 'uploader': 'Sergey M.',
3184 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3185 'title': 'youtube-dl public playlist',
3186 },
3187 'playlist_count': 1,
3188 }, {
3189 'note': 'empty playlist',
3190 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3191 'info_dict': {
3192 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3193 'uploader': 'Sergey M.',
3194 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3195 'title': 'youtube-dl empty playlist',
3196 },
3197 'playlist_count': 0,
3198 }, {
3199 'note': 'Home tab',
3200 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3201 'info_dict': {
3202 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3203 'title': 'lex will - Home',
3204 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3205 'uploader': 'lex will',
3206 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3207 },
3208 'playlist_mincount': 2,
3209 }, {
3210 'note': 'Videos tab',
3211 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3212 'info_dict': {
3213 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3214 'title': 'lex will - Videos',
3215 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3216 'uploader': 'lex will',
3217 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3218 },
3219 'playlist_mincount': 975,
3220 }, {
3221 'note': 'Videos tab, sorted by popular',
3222 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3223 'info_dict': {
3224 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3225 'title': 'lex will - Videos',
3226 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3227 'uploader': 'lex will',
3228 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3229 },
3230 'playlist_mincount': 199,
3231 }, {
3232 'note': 'Playlists tab',
3233 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3234 'info_dict': {
3235 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3236 'title': 'lex will - Playlists',
3237 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3238 'uploader': 'lex will',
3239 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3240 },
3241 'playlist_mincount': 17,
3242 }, {
3243 'note': 'Community tab',
3244 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3245 'info_dict': {
3246 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3247 'title': 'lex will - Community',
3248 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3249 'uploader': 'lex will',
3250 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3251 },
3252 'playlist_mincount': 18,
3253 }, {
3254 'note': 'Channels tab',
3255 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3256 'info_dict': {
3257 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3258 'title': 'lex will - Channels',
3259 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3260 'uploader': 'lex will',
3261 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3262 },
3263 'playlist_mincount': 12,
3264 }, {
3265 'note': 'Search tab',
3266 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3267 'playlist_mincount': 40,
3268 'info_dict': {
3269 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3270 'title': '3Blue1Brown - Search - linear algebra',
3271 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3272 'uploader': '3Blue1Brown',
3273 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3274 },
3275 }, {
3276 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3277 'only_matching': True,
3278 }, {
3279 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3280 'only_matching': True,
3281 }, {
3282 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3283 'only_matching': True,
3284 }, {
3285 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3286 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3287 'info_dict': {
3288 'title': '29C3: Not my department',
3289 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3290 'uploader': 'Christiaan008',
3291 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3292 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3293 },
3294 'playlist_count': 96,
3295 }, {
3296 'note': 'Large playlist',
3297 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3298 'info_dict': {
3299 'title': 'Uploads from Cauchemar',
3300 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3301 'uploader': 'Cauchemar',
3302 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3303 },
3304 'playlist_mincount': 1123,
3305 }, {
3306 'note': 'even larger playlist, 8832 videos',
3307 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3308 'only_matching': True,
3309 }, {
3310 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3311 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3312 'info_dict': {
3313 'title': 'Uploads from Interstellar Movie',
3314 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3315 'uploader': 'Interstellar Movie',
3316 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3317 },
3318 'playlist_mincount': 21,
3319 }, {
3320 'note': 'Playlist with "show unavailable videos" button',
3321 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3322 'info_dict': {
3323 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3324 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3325 'uploader': 'Phim Siêu Nhân Nhật Bản',
3326 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3327 },
3328 'playlist_mincount': 200,
3329 }, {
3330 'note': 'Playlist with unavailable videos in page 7',
3331 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3332 'info_dict': {
3333 'title': 'Uploads from BlankTV',
3334 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3335 'uploader': 'BlankTV',
3336 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3337 },
3338 'playlist_mincount': 1000,
3339 }, {
3340 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3341 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3342 'info_dict': {
3343 'title': 'Data Analysis with Dr Mike Pound',
3344 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3345 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3346 'uploader': 'Computerphile',
3347 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3348 },
3349 'playlist_mincount': 11,
3350 }, {
3351 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3352 'only_matching': True,
3353 }, {
3354 'note': 'Playlist URL that does not actually serve a playlist',
3355 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3356 'info_dict': {
3357 'id': 'FqZTN594JQw',
3358 'ext': 'webm',
3359 'title': "Smiley's People 01 detective, Adventure Series, Action",
3360 'uploader': 'STREEM',
3361 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3362 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3363 'upload_date': '20150526',
3364 'license': 'Standard YouTube License',
3365 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3366 'categories': ['People & Blogs'],
3367 'tags': list,
3368 'view_count': int,
3369 'like_count': int,
3370 'dislike_count': int,
3371 },
3372 'params': {
3373 'skip_download': True,
3374 },
3375 'skip': 'This video is not available.',
3376 'add_ie': [YoutubeIE.ie_key()],
3377 }, {
3378 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3379 'only_matching': True,
3380 }, {
3381 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3382 'only_matching': True,
3383 }, {
3384 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3385 'info_dict': {
3386 'id': '3yImotZU3tw', # This will keep changing
3387 'ext': 'mp4',
3388 'title': compat_str,
3389 'uploader': 'Sky News',
3390 'uploader_id': 'skynews',
3391 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3392 'upload_date': r're:\d{8}',
3393 'description': compat_str,
3394 'categories': ['News & Politics'],
3395 'tags': list,
3396 'like_count': int,
3397 'dislike_count': int,
3398 },
3399 'params': {
3400 'skip_download': True,
3401 },
3402 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3403 }, {
3404 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3405 'info_dict': {
3406 'id': 'a48o2S1cPoo',
3407 'ext': 'mp4',
3408 'title': 'The Young Turks - Live Main Show',
3409 'uploader': 'The Young Turks',
3410 'uploader_id': 'TheYoungTurks',
3411 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3412 'upload_date': '20150715',
3413 'license': 'Standard YouTube License',
3414 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3415 'categories': ['News & Politics'],
3416 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3417 'like_count': int,
3418 'dislike_count': int,
3419 },
3420 'params': {
3421 'skip_download': True,
3422 },
3423 'only_matching': True,
3424 }, {
3425 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3426 'only_matching': True,
3427 }, {
3428 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3429 'only_matching': True,
3430 }, {
3431 'note': 'A channel that is not live. Should raise error',
3432 'url': 'https://www.youtube.com/user/numberphile/live',
3433 'only_matching': True,
3434 }, {
3435 'url': 'https://www.youtube.com/feed/trending',
3436 'only_matching': True,
3437 }, {
3438 'url': 'https://www.youtube.com/feed/library',
3439 'only_matching': True,
3440 }, {
3441 'url': 'https://www.youtube.com/feed/history',
3442 'only_matching': True,
3443 }, {
3444 'url': 'https://www.youtube.com/feed/subscriptions',
3445 'only_matching': True,
3446 }, {
3447 'url': 'https://www.youtube.com/feed/watch_later',
3448 'only_matching': True,
3449 }, {
3450 'note': 'Recommended - redirects to home page.',
3451 'url': 'https://www.youtube.com/feed/recommended',
3452 'only_matching': True,
3453 }, {
3454 'note': 'inline playlist with not always working continuations',
3455 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3456 'only_matching': True,
3457 }, {
3458 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3459 'only_matching': True,
3460 }, {
3461 'url': 'https://www.youtube.com/course',
3462 'only_matching': True,
3463 }, {
3464 'url': 'https://www.youtube.com/zsecurity',
3465 'only_matching': True,
3466 }, {
3467 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3468 'only_matching': True,
3469 }, {
3470 'url': 'https://www.youtube.com/TheYoungTurks/live',
3471 'only_matching': True,
3472 }, {
3473 'url': 'https://www.youtube.com/hashtag/cctv9',
3474 'info_dict': {
3475 'id': 'cctv9',
3476 'title': '#cctv9',
3477 },
3478 'playlist_mincount': 350,
3479 }, {
3480 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3481 'only_matching': True,
3482 }, {
3483 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3484 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3485 'only_matching': True
3486 }, {
3487 'note': '/browse/ should redirect to /channel/',
3488 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3489 'only_matching': True
3490 }, {
3491 'note': 'VLPL, should redirect to playlist?list=PL...',
3492 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3493 'info_dict': {
3494 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3495 'uploader': 'NoCopyrightSounds',
3496 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3497 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3498 'title': 'NCS Releases',
3499 },
3500 'playlist_mincount': 166,
3501 }, {
3502 'note': 'Topic, should redirect to playlist?list=UU...',
3503 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3504 'info_dict': {
3505 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3506 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3507 'title': 'Uploads from Royalty Free Music - Topic',
3508 'uploader': 'Royalty Free Music - Topic',
3509 },
3510 'expected_warnings': [
3511 'A channel/user page was given',
3512 'The URL does not have a videos tab',
3513 ],
3514 'playlist_mincount': 101,
3515 }, {
3516 'note': 'Topic without a UU playlist',
3517 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3518 'info_dict': {
3519 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3520 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3521 },
3522 'expected_warnings': [
3523 'A channel/user page was given',
3524 'The URL does not have a videos tab',
3525 'Falling back to channel URL',
3526 ],
3527 'playlist_mincount': 9,
3528 }, {
3529 'note': 'Youtube music Album',
3530 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3531 'info_dict': {
3532 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3533 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3534 },
3535 'playlist_count': 50,
3536 }, {
3537 'note': 'unlisted single video playlist',
3538 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3539 'info_dict': {
3540 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3541 'uploader': 'colethedj',
3542 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3543 'title': 'yt-dlp unlisted playlist test',
3544 'availability': 'unlisted'
3545 },
3546 'playlist_count': 1,
3547 }, {
3548 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
3549 'url': 'https://www.youtube.com/feed/recommended',
3550 'info_dict': {
3551 'id': 'recommended',
3552 'title': 'recommended',
3553 },
3554 'playlist_mincount': 50,
3555 'params': {
3556 'skip_download': True,
3557 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3558 },
3559 }, {
3560 'note': 'API Fallback: /videos tab, sorted by oldest first',
3561 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
3562 'info_dict': {
3563 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
3564 'title': 'Cody\'sLab - Videos',
3565 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
3566 'uploader': 'Cody\'sLab',
3567 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
3568 },
3569 'playlist_mincount': 650,
3570 'params': {
3571 'skip_download': True,
3572 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3573 },
3574 }, {
3575 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
3576 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3577 'info_dict': {
3578 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3579 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3580 'title': 'Uploads from Royalty Free Music - Topic',
3581 'uploader': 'Royalty Free Music - Topic',
3582 },
3583 'expected_warnings': [
3584 'A channel/user page was given',
3585 'The URL does not have a videos tab',
3586 ],
3587 'playlist_mincount': 101,
3588 'params': {
3589 'skip_download': True,
3590 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3591 },
3592 }]
3593
3594 @classmethod
3595 def suitable(cls, url):
3596 return False if YoutubeIE.suitable(url) else super(
3597 YoutubeTabIE, cls).suitable(url)
3598
3599 def _extract_channel_id(self, webpage):
3600 channel_id = self._html_search_meta(
3601 'channelId', webpage, 'channel id', default=None)
3602 if channel_id:
3603 return channel_id
3604 channel_url = self._html_search_meta(
3605 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3606 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3607 'twitter:app:url:googleplay'), webpage, 'channel url')
3608 return self._search_regex(
3609 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3610 channel_url, 'channel id')
3611
3612 @staticmethod
3613 def _extract_basic_item_renderer(item):
3614 # Modified from _extract_grid_item_renderer
3615 known_basic_renderers = (
3616 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3617 )
3618 for key, renderer in item.items():
3619 if not isinstance(renderer, dict):
3620 continue
3621 elif key in known_basic_renderers:
3622 return renderer
3623 elif key.startswith('grid') and key.endswith('Renderer'):
3624 return renderer
3625
3626 def _grid_entries(self, grid_renderer):
3627 for item in grid_renderer['items']:
3628 if not isinstance(item, dict):
3629 continue
3630 renderer = self._extract_basic_item_renderer(item)
3631 if not isinstance(renderer, dict):
3632 continue
3633 title = self._get_text(renderer, 'title')
3634
3635 # playlist
3636 playlist_id = renderer.get('playlistId')
3637 if playlist_id:
3638 yield self.url_result(
3639 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3640 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3641 video_title=title)
3642 continue
3643 # video
3644 video_id = renderer.get('videoId')
3645 if video_id:
3646 yield self._extract_video(renderer)
3647 continue
3648 # channel
3649 channel_id = renderer.get('channelId')
3650 if channel_id:
3651 yield self.url_result(
3652 'https://www.youtube.com/channel/%s' % channel_id,
3653 ie=YoutubeTabIE.ie_key(), video_title=title)
3654 continue
3655 # generic endpoint URL support
3656 ep_url = urljoin('https://www.youtube.com/', try_get(
3657 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3658 compat_str))
3659 if ep_url:
3660 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3661 if ie.suitable(ep_url):
3662 yield self.url_result(
3663 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3664 break
3665
3666 def _shelf_entries_from_content(self, shelf_renderer):
3667 content = shelf_renderer.get('content')
3668 if not isinstance(content, dict):
3669 return
3670 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3671 if renderer:
3672 # TODO: add support for nested playlists so each shelf is processed
3673 # as separate playlist
3674 # TODO: this includes only first N items
3675 for entry in self._grid_entries(renderer):
3676 yield entry
3677 renderer = content.get('horizontalListRenderer')
3678 if renderer:
3679 # TODO
3680 pass
3681
3682 def _shelf_entries(self, shelf_renderer, skip_channels=False):
3683 ep = try_get(
3684 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3685 compat_str)
3686 shelf_url = urljoin('https://www.youtube.com', ep)
3687 if shelf_url:
3688 # Skipping links to another channels, note that checking for
3689 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3690 # will not work
3691 if skip_channels and '/channels?' in shelf_url:
3692 return
3693 title = self._get_text(shelf_renderer, 'title')
3694 yield self.url_result(shelf_url, video_title=title)
3695 # Shelf may not contain shelf URL, fallback to extraction from content
3696 for entry in self._shelf_entries_from_content(shelf_renderer):
3697 yield entry
3698
3699 def _playlist_entries(self, video_list_renderer):
3700 for content in video_list_renderer['contents']:
3701 if not isinstance(content, dict):
3702 continue
3703 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3704 if not isinstance(renderer, dict):
3705 continue
3706 video_id = renderer.get('videoId')
3707 if not video_id:
3708 continue
3709 yield self._extract_video(renderer)
3710
3711 def _rich_entries(self, rich_grid_renderer):
3712 renderer = try_get(
3713 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3714 video_id = renderer.get('videoId')
3715 if not video_id:
3716 return
3717 yield self._extract_video(renderer)
3718
3719 def _video_entry(self, video_renderer):
3720 video_id = video_renderer.get('videoId')
3721 if video_id:
3722 return self._extract_video(video_renderer)
3723
3724 def _post_thread_entries(self, post_thread_renderer):
3725 post_renderer = try_get(
3726 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3727 if not post_renderer:
3728 return
3729 # video attachment
3730 video_renderer = try_get(
3731 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3732 video_id = video_renderer.get('videoId')
3733 if video_id:
3734 entry = self._extract_video(video_renderer)
3735 if entry:
3736 yield entry
3737 # playlist attachment
3738 playlist_id = try_get(
3739 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3740 if playlist_id:
3741 yield self.url_result(
3742 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3743 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3744 # inline video links
3745 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3746 for run in runs:
3747 if not isinstance(run, dict):
3748 continue
3749 ep_url = try_get(
3750 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3751 if not ep_url:
3752 continue
3753 if not YoutubeIE.suitable(ep_url):
3754 continue
3755 ep_video_id = YoutubeIE._match_id(ep_url)
3756 if video_id == ep_video_id:
3757 continue
3758 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3759
3760 def _post_thread_continuation_entries(self, post_thread_continuation):
3761 contents = post_thread_continuation.get('contents')
3762 if not isinstance(contents, list):
3763 return
3764 for content in contents:
3765 renderer = content.get('backstagePostThreadRenderer')
3766 if not isinstance(renderer, dict):
3767 continue
3768 for entry in self._post_thread_entries(renderer):
3769 yield entry
3770
3771 r''' # unused
3772 def _rich_grid_entries(self, contents):
3773 for content in contents:
3774 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3775 if video_renderer:
3776 entry = self._video_entry(video_renderer)
3777 if entry:
3778 yield entry
3779 '''
3780 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3781
3782 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3783 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3784 for content in contents:
3785 if not isinstance(content, dict):
3786 continue
3787 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3788 if not is_renderer:
3789 renderer = content.get('richItemRenderer')
3790 if renderer:
3791 for entry in self._rich_entries(renderer):
3792 yield entry
3793 continuation_list[0] = self._extract_continuation(parent_renderer)
3794 continue
3795 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3796 for isr_content in isr_contents:
3797 if not isinstance(isr_content, dict):
3798 continue
3799
3800 known_renderers = {
3801 'playlistVideoListRenderer': self._playlist_entries,
3802 'gridRenderer': self._grid_entries,
3803 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3804 'backstagePostThreadRenderer': self._post_thread_entries,
3805 'videoRenderer': lambda x: [self._video_entry(x)],
3806 }
3807 for key, renderer in isr_content.items():
3808 if key not in known_renderers:
3809 continue
3810 for entry in known_renderers[key](renderer):
3811 if entry:
3812 yield entry
3813 continuation_list[0] = self._extract_continuation(renderer)
3814 break
3815
3816 if not continuation_list[0]:
3817 continuation_list[0] = self._extract_continuation(is_renderer)
3818
3819 if not continuation_list[0]:
3820 continuation_list[0] = self._extract_continuation(parent_renderer)
3821
3822 continuation_list = [None] # Python 2 does not support nonlocal
3823 tab_content = try_get(tab, lambda x: x['content'], dict)
3824 if not tab_content:
3825 return
3826 parent_renderer = (
3827 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3828 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3829 for entry in extract_entries(parent_renderer):
3830 yield entry
3831 continuation = continuation_list[0]
3832
3833 for page_num in itertools.count(1):
3834 if not continuation:
3835 break
3836 headers = self.generate_api_headers(
3837 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3838 response = self._extract_response(
3839 item_id='%s page %s' % (item_id, page_num),
3840 query=continuation, headers=headers, ytcfg=ytcfg,
3841 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3842
3843 if not response:
3844 break
3845 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3846 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3847 visitor_data = self._extract_visitor_data(response) or visitor_data
3848
3849 known_continuation_renderers = {
3850 'playlistVideoListContinuation': self._playlist_entries,
3851 'gridContinuation': self._grid_entries,
3852 'itemSectionContinuation': self._post_thread_continuation_entries,
3853 'sectionListContinuation': extract_entries, # for feeds
3854 }
3855 continuation_contents = try_get(
3856 response, lambda x: x['continuationContents'], dict) or {}
3857 continuation_renderer = None
3858 for key, value in continuation_contents.items():
3859 if key not in known_continuation_renderers:
3860 continue
3861 continuation_renderer = value
3862 continuation_list = [None]
3863 for entry in known_continuation_renderers[key](continuation_renderer):
3864 yield entry
3865 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3866 break
3867 if continuation_renderer:
3868 continue
3869
3870 known_renderers = {
3871 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3872 'gridVideoRenderer': (self._grid_entries, 'items'),
3873 'gridChannelRenderer': (self._grid_entries, 'items'),
3874 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3875 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
3876 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
3877 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3878 }
3879 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3880 continuation_items = try_get(
3881 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3882 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3883 video_items_renderer = None
3884 for key, value in continuation_item.items():
3885 if key not in known_renderers:
3886 continue
3887 video_items_renderer = {known_renderers[key][1]: continuation_items}
3888 continuation_list = [None]
3889 for entry in known_renderers[key][0](video_items_renderer):
3890 yield entry
3891 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3892 break
3893 if video_items_renderer:
3894 continue
3895 break
3896
3897 @staticmethod
3898 def _extract_selected_tab(tabs):
3899 for tab in tabs:
3900 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3901 if renderer.get('selected') is True:
3902 return renderer
3903 else:
3904 raise ExtractorError('Unable to find selected tab')
3905
3906 @classmethod
3907 def _extract_uploader(cls, data):
3908 uploader = {}
3909 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3910 owner = try_get(
3911 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3912 if owner:
3913 uploader['uploader'] = owner.get('text')
3914 uploader['uploader_id'] = try_get(
3915 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3916 uploader['uploader_url'] = urljoin(
3917 'https://www.youtube.com/',
3918 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3919 return {k: v for k, v in uploader.items() if v is not None}
3920
3921 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
3922 playlist_id = title = description = channel_url = channel_name = channel_id = None
3923 thumbnails_list = []
3924 tags = []
3925
3926 selected_tab = self._extract_selected_tab(tabs)
3927 renderer = try_get(
3928 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3929 if renderer:
3930 channel_name = renderer.get('title')
3931 channel_url = renderer.get('channelUrl')
3932 channel_id = renderer.get('externalId')
3933 else:
3934 renderer = try_get(
3935 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3936
3937 if renderer:
3938 title = renderer.get('title')
3939 description = renderer.get('description', '')
3940 playlist_id = channel_id
3941 tags = renderer.get('keywords', '').split()
3942 thumbnails_list = (
3943 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3944 or try_get(
3945 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3946 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3947 list)
3948 or [])
3949
3950 thumbnails = []
3951 for t in thumbnails_list:
3952 if not isinstance(t, dict):
3953 continue
3954 thumbnail_url = url_or_none(t.get('url'))
3955 if not thumbnail_url:
3956 continue
3957 thumbnails.append({
3958 'url': thumbnail_url,
3959 'width': int_or_none(t.get('width')),
3960 'height': int_or_none(t.get('height')),
3961 })
3962 if playlist_id is None:
3963 playlist_id = item_id
3964 if title is None:
3965 title = (
3966 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3967 or playlist_id)
3968 title += format_field(selected_tab, 'title', ' - %s')
3969 title += format_field(selected_tab, 'expandedText', ' - %s')
3970 metadata = {
3971 'playlist_id': playlist_id,
3972 'playlist_title': title,
3973 'playlist_description': description,
3974 'uploader': channel_name,
3975 'uploader_id': channel_id,
3976 'uploader_url': channel_url,
3977 'thumbnails': thumbnails,
3978 'tags': tags,
3979 }
3980 availability = self._extract_availability(data)
3981 if availability:
3982 metadata['availability'] = availability
3983 if not channel_id:
3984 metadata.update(self._extract_uploader(data))
3985 metadata.update({
3986 'channel': metadata['uploader'],
3987 'channel_id': metadata['uploader_id'],
3988 'channel_url': metadata['uploader_url']})
3989 return self.playlist_result(
3990 self._entries(
3991 selected_tab, playlist_id, ytcfg,
3992 self._extract_account_syncid(ytcfg, data),
3993 self._extract_visitor_data(data, ytcfg)),
3994 **metadata)
3995
3996 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3997 first_id = last_id = response = None
3998 for page_num in itertools.count(1):
3999 videos = list(self._playlist_entries(playlist))
4000 if not videos:
4001 return
4002 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4003 if start >= len(videos):
4004 return
4005 for video in videos[start:]:
4006 if video['id'] == first_id:
4007 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4008 return
4009 yield video
4010 first_id = first_id or videos[0]['id']
4011 last_id = videos[-1]['id']
4012 watch_endpoint = try_get(
4013 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4014 headers = self.generate_api_headers(
4015 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4016 visitor_data=self._extract_visitor_data(response, data, ytcfg))
4017 query = {
4018 'playlistId': playlist_id,
4019 'videoId': watch_endpoint.get('videoId') or last_id,
4020 'index': watch_endpoint.get('index') or len(videos),
4021 'params': watch_endpoint.get('params') or 'OAE%3D'
4022 }
4023 response = self._extract_response(
4024 item_id='%s page %d' % (playlist_id, page_num),
4025 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4026 check_get_keys='contents'
4027 )
4028 playlist = try_get(
4029 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4030
4031 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
4032 title = playlist.get('title') or try_get(
4033 data, lambda x: x['titleText']['simpleText'], compat_str)
4034 playlist_id = playlist.get('playlistId') or item_id
4035
4036 # Delegating everything except mix playlists to regular tab-based playlist URL
4037 playlist_url = urljoin(url, try_get(
4038 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4039 compat_str))
4040 if playlist_url and playlist_url != url:
4041 return self.url_result(
4042 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4043 video_title=title)
4044
4045 return self.playlist_result(
4046 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
4047 playlist_id=playlist_id, playlist_title=title)
4048
4049 def _extract_availability(self, data):
4050 """
4051 Gets the availability of a given playlist/tab.
4052 Note: Unless YouTube tells us explicitly, we do not assume it is public
4053 @param data: response
4054 """
4055 is_private = is_unlisted = None
4056 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4057 badge_labels = self._extract_badges(renderer)
4058
4059 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4060 privacy_dropdown_entries = try_get(
4061 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4062 for renderer_dict in privacy_dropdown_entries:
4063 is_selected = try_get(
4064 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4065 if not is_selected:
4066 continue
4067 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4068 if label:
4069 badge_labels.add(label.lower())
4070 break
4071
4072 for badge_label in badge_labels:
4073 if badge_label == 'unlisted':
4074 is_unlisted = True
4075 elif badge_label == 'private':
4076 is_private = True
4077 elif badge_label == 'public':
4078 is_unlisted = is_private = False
4079 return self._availability(is_private, False, False, False, is_unlisted)
4080
4081 @staticmethod
4082 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4083 sidebar_renderer = try_get(
4084 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4085 for item in sidebar_renderer:
4086 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4087 if renderer:
4088 return renderer
4089
4090 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
4091 """
4092 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4093 """
4094 browse_id = params = None
4095 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4096 if not renderer:
4097 return
4098 menu_renderer = try_get(
4099 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4100 for menu_item in menu_renderer:
4101 if not isinstance(menu_item, dict):
4102 continue
4103 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4104 text = try_get(
4105 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4106 if not text or text.lower() != 'show unavailable videos':
4107 continue
4108 browse_endpoint = try_get(
4109 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4110 browse_id = browse_endpoint.get('browseId')
4111 params = browse_endpoint.get('params')
4112 break
4113
4114 headers = self.generate_api_headers(
4115 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4116 visitor_data=self._extract_visitor_data(data, ytcfg))
4117 query = {
4118 'params': params or 'wgYCCAA=',
4119 'browseId': browse_id or 'VL%s' % item_id
4120 }
4121 return self._extract_response(
4122 item_id=item_id, headers=headers, query=query,
4123 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4124 note='Downloading API JSON with unavailable videos')
4125
4126 def _extract_webpage(self, url, item_id, fatal=True):
4127 retries = self.get_param('extractor_retries', 3)
4128 count = -1
4129 webpage = data = last_error = None
4130 while count < retries:
4131 count += 1
4132 # Sometimes youtube returns a webpage with incomplete ytInitialData
4133 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4134 if last_error:
4135 self.report_warning('%s. Retrying ...' % last_error)
4136 try:
4137 webpage = self._download_webpage(
4138 url, item_id,
4139 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
4140 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4141 except ExtractorError as e:
4142 if isinstance(e.cause, network_exceptions):
4143 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
4144 last_error = error_to_compat_str(e.cause or e.msg)
4145 if count < retries:
4146 continue
4147 if fatal:
4148 raise
4149 self.report_warning(error_to_compat_str(e))
4150 break
4151 else:
4152 try:
4153 self._extract_and_report_alerts(data)
4154 except ExtractorError as e:
4155 if fatal:
4156 raise
4157 self.report_warning(error_to_compat_str(e))
4158 break
4159
4160 if dict_get(data, ('contents', 'currentVideoEndpoint')):
4161 break
4162
4163 last_error = 'Incomplete yt initial data received'
4164 if count >= retries:
4165 if fatal:
4166 raise ExtractorError(last_error)
4167 self.report_warning(last_error)
4168 break
4169
4170 return webpage, data
4171
4172 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4173 data = None
4174 if 'webpage' not in self._configuration_arg('skip'):
4175 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4176 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4177 if not data:
4178 if not ytcfg and self.is_authenticated:
4179 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
4180 if 'authcheck' not in self._configuration_arg('skip') and fatal:
4181 raise ExtractorError(
4182 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
4183 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4184 expected=True)
4185 self.report_warning(msg, only_once=True)
4186 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4187 return data, ytcfg
4188
4189 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4190 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4191 resolve_response = self._extract_response(
4192 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4193 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4194 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4195 for ep_key, ep in endpoints.items():
4196 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4197 if params:
4198 return self._extract_response(
4199 item_id=item_id, query=params, ep=ep, headers=headers,
4200 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4201 check_get_keys=('contents', 'currentVideoEndpoint'))
4202 err_note = 'Failed to resolve url (does the playlist exist?)'
4203 if fatal:
4204 raise ExtractorError(err_note, expected=True)
4205 self.report_warning(err_note, item_id)
4206
4207 @staticmethod
4208 def _smuggle_data(entries, data):
4209 for entry in entries:
4210 if data:
4211 entry['url'] = smuggle_url(entry['url'], data)
4212 yield entry
4213
4214 def _real_extract(self, url):
4215 url, smuggled_data = unsmuggle_url(url, {})
4216 if self.is_music_url(url):
4217 smuggled_data['is_music_url'] = True
4218 info_dict = self.__real_extract(url, smuggled_data)
4219 if info_dict.get('entries'):
4220 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4221 return info_dict
4222
4223 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4224
4225 def __real_extract(self, url, smuggled_data):
4226 item_id = self._match_id(url)
4227 url = compat_urlparse.urlunparse(
4228 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4229 compat_opts = self.get_param('compat_opts', [])
4230
4231 def get_mobj(url):
4232 mobj = self._url_re.match(url).groupdict()
4233 mobj.update((k, '') for k, v in mobj.items() if v is None)
4234 return mobj
4235
4236 mobj = get_mobj(url)
4237 # Youtube returns incomplete data if tabname is not lower case
4238 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4239 if is_channel:
4240 if smuggled_data.get('is_music_url'):
4241 if item_id[:2] == 'VL':
4242 # Youtube music VL channels have an equivalent playlist
4243 item_id = item_id[2:]
4244 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4245 elif item_id[:2] == 'MP':
4246 # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4247 mdata = self._extract_tab_endpoint(
4248 'https://music.youtube.com/channel/%s' % item_id, item_id, default_client='web_music')
4249 murl = traverse_obj(
4250 mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), get_all=False, expected_type=compat_str)
4251 if not murl:
4252 raise ExtractorError('Failed to resolve album to playlist.')
4253 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
4254 elif mobj['channel_type'] == 'browse':
4255 # Youtube music /browse/ should be changed to /channel/
4256 pre = 'https://www.youtube.com/channel/%s' % item_id
4257 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4258 # Home URLs should redirect to /videos/
4259 self.report_warning(
4260 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4261 'To download only the videos in the home page, add a "/featured" to the URL')
4262 tab = '/videos'
4263
4264 url = ''.join((pre, tab, post))
4265 mobj = get_mobj(url)
4266
4267 # Handle both video/playlist URLs
4268 qs = parse_qs(url)
4269 video_id = qs.get('v', [None])[0]
4270 playlist_id = qs.get('list', [None])[0]
4271
4272 if not video_id and mobj['not_channel'].startswith('watch'):
4273 if not playlist_id:
4274 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4275 raise ExtractorError('Unable to recognize tab page')
4276 # Common mistake: https://www.youtube.com/watch?list=playlist_id
4277 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4278 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4279 mobj = get_mobj(url)
4280
4281 if video_id and playlist_id:
4282 if self.get_param('noplaylist'):
4283 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4284 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4285 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4286
4287 data, ytcfg = self._extract_data(url, item_id)
4288
4289 tabs = try_get(
4290 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4291 if tabs:
4292 selected_tab = self._extract_selected_tab(tabs)
4293 tab_name = selected_tab.get('title', '')
4294 if 'no-youtube-channel-redirect' not in compat_opts:
4295 if mobj['tab'] == '/live':
4296 # Live tab should have redirected to the video
4297 raise ExtractorError('The channel is not currently live', expected=True)
4298 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4299 if not mobj['not_channel'] and item_id[:2] == 'UC':
4300 # Topic channels don't have /videos. Use the equivalent playlist instead
4301 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4302 pl_id = 'UU%s' % item_id[2:]
4303 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4304 try:
4305 data, ytcfg, item_id, url = *self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True), pl_id, pl_url
4306 except ExtractorError:
4307 self.report_warning('The playlist gave error. Falling back to channel URL')
4308 else:
4309 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4310
4311 self.write_debug('Final URL: %s' % url)
4312
4313 # YouTube sometimes provides a button to reload playlist with unavailable videos.
4314 if 'no-youtube-unavailable-videos' not in compat_opts:
4315 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
4316 self._extract_and_report_alerts(data, only_once=True)
4317 tabs = try_get(
4318 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4319 if tabs:
4320 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
4321
4322 playlist = try_get(
4323 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4324 if playlist:
4325 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
4326
4327 video_id = try_get(
4328 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4329 compat_str) or video_id
4330 if video_id:
4331 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4332 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4333 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4334
4335 raise ExtractorError('Unable to recognize tab page')
4336
4337
4338class YoutubePlaylistIE(InfoExtractor):
4339 IE_DESC = 'YouTube playlists'
4340 _VALID_URL = r'''(?x)(?:
4341 (?:https?://)?
4342 (?:\w+\.)?
4343 (?:
4344 (?:
4345 youtube(?:kids)?\.com|
4346 %(invidious)s
4347 )
4348 /.*?\?.*?\blist=
4349 )?
4350 (?P<id>%(playlist_id)s)
4351 )''' % {
4352 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4353 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4354 }
4355 IE_NAME = 'youtube:playlist'
4356 _TESTS = [{
4357 'note': 'issue #673',
4358 'url': 'PLBB231211A4F62143',
4359 'info_dict': {
4360 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4361 'id': 'PLBB231211A4F62143',
4362 'uploader': 'Wickydoo',
4363 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4364 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4365 },
4366 'playlist_mincount': 29,
4367 }, {
4368 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4369 'info_dict': {
4370 'title': 'YDL_safe_search',
4371 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4372 },
4373 'playlist_count': 2,
4374 'skip': 'This playlist is private',
4375 }, {
4376 'note': 'embedded',
4377 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4378 'playlist_count': 4,
4379 'info_dict': {
4380 'title': 'JODA15',
4381 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4382 'uploader': 'milan',
4383 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4384 }
4385 }, {
4386 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4387 'playlist_mincount': 654,
4388 'info_dict': {
4389 'title': '2018 Chinese New Singles (11/6 updated)',
4390 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4391 'uploader': 'LBK',
4392 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4393 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4394 }
4395 }, {
4396 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4397 'only_matching': True,
4398 }, {
4399 # music album playlist
4400 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4401 'only_matching': True,
4402 }]
4403
4404 @classmethod
4405 def suitable(cls, url):
4406 if YoutubeTabIE.suitable(url):
4407 return False
4408 from ..utils import parse_qs
4409 qs = parse_qs(url)
4410 if qs.get('v', [None])[0]:
4411 return False
4412 return super(YoutubePlaylistIE, cls).suitable(url)
4413
4414 def _real_extract(self, url):
4415 playlist_id = self._match_id(url)
4416 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4417 url = update_url_query(
4418 'https://www.youtube.com/playlist',
4419 parse_qs(url) or {'list': playlist_id})
4420 if is_music_url:
4421 url = smuggle_url(url, {'is_music_url': True})
4422 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4423
4424
4425class YoutubeYtBeIE(InfoExtractor):
4426 IE_DESC = 'youtu.be'
4427 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4428 _TESTS = [{
4429 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4430 'info_dict': {
4431 'id': 'yeWKywCrFtk',
4432 'ext': 'mp4',
4433 'title': 'Small Scale Baler and Braiding Rugs',
4434 'uploader': 'Backus-Page House Museum',
4435 'uploader_id': 'backuspagemuseum',
4436 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4437 'upload_date': '20161008',
4438 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4439 'categories': ['Nonprofits & Activism'],
4440 'tags': list,
4441 'like_count': int,
4442 'dislike_count': int,
4443 },
4444 'params': {
4445 'noplaylist': True,
4446 'skip_download': True,
4447 },
4448 }, {
4449 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4450 'only_matching': True,
4451 }]
4452
4453 def _real_extract(self, url):
4454 mobj = self._match_valid_url(url)
4455 video_id = mobj.group('id')
4456 playlist_id = mobj.group('playlist_id')
4457 return self.url_result(
4458 update_url_query('https://www.youtube.com/watch', {
4459 'v': video_id,
4460 'list': playlist_id,
4461 'feature': 'youtu.be',
4462 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4463
4464
4465class YoutubeYtUserIE(InfoExtractor):
4466 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
4467 _VALID_URL = r'ytuser:(?P<id>.+)'
4468 _TESTS = [{
4469 'url': 'ytuser:phihag',
4470 'only_matching': True,
4471 }]
4472
4473 def _real_extract(self, url):
4474 user_id = self._match_id(url)
4475 return self.url_result(
4476 'https://www.youtube.com/user/%s/videos' % user_id,
4477 ie=YoutubeTabIE.ie_key(), video_id=user_id)
4478
4479
4480class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4481 IE_NAME = 'youtube:favorites'
4482 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
4483 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4484 _LOGIN_REQUIRED = True
4485 _TESTS = [{
4486 'url': ':ytfav',
4487 'only_matching': True,
4488 }, {
4489 'url': ':ytfavorites',
4490 'only_matching': True,
4491 }]
4492
4493 def _real_extract(self, url):
4494 return self.url_result(
4495 'https://www.youtube.com/playlist?list=LL',
4496 ie=YoutubeTabIE.ie_key())
4497
4498
4499class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4500 IE_DESC = 'YouTube searches'
4501 IE_NAME = 'youtube:search'
4502 _SEARCH_KEY = 'ytsearch'
4503 _SEARCH_PARAMS = None
4504 _TESTS = []
4505
4506 def _search_results(self, query):
4507 data = {'query': query}
4508 if self._SEARCH_PARAMS:
4509 data['params'] = self._SEARCH_PARAMS
4510 continuation = {}
4511 for page_num in itertools.count(1):
4512 data.update(continuation)
4513 search = self._extract_response(
4514 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4515 check_get_keys=('contents', 'onResponseReceivedCommands')
4516 )
4517 if not search:
4518 break
4519 slr_contents = try_get(
4520 search,
4521 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4522 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4523 list)
4524 if not slr_contents:
4525 break
4526
4527 # Youtube sometimes adds promoted content to searches,
4528 # changing the index location of videos and token.
4529 # So we search through all entries till we find them.
4530 continuation = None
4531 for slr_content in slr_contents:
4532 if not continuation:
4533 continuation = self._extract_continuation({'contents': [slr_content]})
4534
4535 isr_contents = try_get(
4536 slr_content,
4537 lambda x: x['itemSectionRenderer']['contents'],
4538 list)
4539 if not isr_contents:
4540 continue
4541 for content in isr_contents:
4542 if not isinstance(content, dict):
4543 continue
4544 video = content.get('videoRenderer')
4545 if not isinstance(video, dict):
4546 continue
4547 video_id = video.get('videoId')
4548 if not video_id:
4549 continue
4550
4551 yield self._extract_video(video)
4552
4553 if not continuation:
4554 break
4555
4556
4557class YoutubeSearchDateIE(YoutubeSearchIE):
4558 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4559 _SEARCH_KEY = 'ytsearchdate'
4560 IE_DESC = 'YouTube searches, newest videos first'
4561 _SEARCH_PARAMS = 'CAI%3D'
4562
4563
4564class YoutubeSearchURLIE(YoutubeSearchIE):
4565 IE_DESC = 'YouTube search URLs with sorting and filter support'
4566 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4567 _SEARCH_KEY = None
4568 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4569 # _MAX_RESULTS = 100
4570 _TESTS = [{
4571 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4572 'playlist_mincount': 5,
4573 'info_dict': {
4574 'id': 'youtube-dl test video',
4575 'title': 'youtube-dl test video',
4576 }
4577 }, {
4578 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4579 'only_matching': True,
4580 }]
4581
4582 @classmethod
4583 def _make_valid_url(cls):
4584 return cls._VALID_URL
4585
4586 def _real_extract(self, url):
4587 qs = parse_qs(url)
4588 query = (qs.get('search_query') or qs.get('q'))[0]
4589 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4590 return self._get_n_results(query, self._MAX_RESULTS)
4591
4592
4593class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4594 """
4595 Base class for feed extractors
4596 Subclasses must define the _FEED_NAME property.
4597 """
4598 _LOGIN_REQUIRED = True
4599 _TESTS = []
4600
4601 @property
4602 def IE_NAME(self):
4603 return 'youtube:%s' % self._FEED_NAME
4604
4605 def _real_extract(self, url):
4606 return self.url_result(
4607 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4608 ie=YoutubeTabIE.ie_key())
4609
4610
4611class YoutubeWatchLaterIE(InfoExtractor):
4612 IE_NAME = 'youtube:watchlater'
4613 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
4614 _VALID_URL = r':ytwatchlater'
4615 _TESTS = [{
4616 'url': ':ytwatchlater',
4617 'only_matching': True,
4618 }]
4619
4620 def _real_extract(self, url):
4621 return self.url_result(
4622 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4623
4624
4625class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4626 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
4627 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4628 _FEED_NAME = 'recommended'
4629 _LOGIN_REQUIRED = False
4630 _TESTS = [{
4631 'url': ':ytrec',
4632 'only_matching': True,
4633 }, {
4634 'url': ':ytrecommended',
4635 'only_matching': True,
4636 }, {
4637 'url': 'https://youtube.com',
4638 'only_matching': True,
4639 }]
4640
4641
4642class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4643 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
4644 _VALID_URL = r':ytsub(?:scription)?s?'
4645 _FEED_NAME = 'subscriptions'
4646 _TESTS = [{
4647 'url': ':ytsubs',
4648 'only_matching': True,
4649 }, {
4650 'url': ':ytsubscriptions',
4651 'only_matching': True,
4652 }]
4653
4654
4655class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4656 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
4657 _VALID_URL = r':ythis(?:tory)?'
4658 _FEED_NAME = 'history'
4659 _TESTS = [{
4660 'url': ':ythistory',
4661 'only_matching': True,
4662 }]
4663
4664
4665class YoutubeTruncatedURLIE(InfoExtractor):
4666 IE_NAME = 'youtube:truncated_url'
4667 IE_DESC = False # Do not list
4668 _VALID_URL = r'''(?x)
4669 (?:https?://)?
4670 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4671 (?:watch\?(?:
4672 feature=[a-z_]+|
4673 annotation_id=annotation_[^&]+|
4674 x-yt-cl=[0-9]+|
4675 hl=[^&]*|
4676 t=[0-9]+
4677 )?
4678 |
4679 attribution_link\?a=[^&]+
4680 )
4681 $
4682 '''
4683
4684 _TESTS = [{
4685 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4686 'only_matching': True,
4687 }, {
4688 'url': 'https://www.youtube.com/watch?',
4689 'only_matching': True,
4690 }, {
4691 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4692 'only_matching': True,
4693 }, {
4694 'url': 'https://www.youtube.com/watch?feature=foo',
4695 'only_matching': True,
4696 }, {
4697 'url': 'https://www.youtube.com/watch?hl=en-GB',
4698 'only_matching': True,
4699 }, {
4700 'url': 'https://www.youtube.com/watch?t=2372',
4701 'only_matching': True,
4702 }]
4703
4704 def _real_extract(self, url):
4705 raise ExtractorError(
4706 'Did you forget to quote the URL? Remember that & is a meta '
4707 'character in most shells, so you want to put the URL in quotes, '
4708 'like youtube-dl '
4709 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4710 ' or simply youtube-dl BaW_jenozKc .',
4711 expected=True)
4712
4713
4714class YoutubeClipIE(InfoExtractor):
4715 IE_NAME = 'youtube:clip'
4716 IE_DESC = False # Do not list
4717 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4718
4719 def _real_extract(self, url):
4720 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4721 return self.url_result(url, 'Generic')
4722
4723
4724class YoutubeTruncatedIDIE(InfoExtractor):
4725 IE_NAME = 'youtube:truncated_id'
4726 IE_DESC = False # Do not list
4727 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4728
4729 _TESTS = [{
4730 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4731 'only_matching': True,
4732 }]
4733
4734 def _real_extract(self, url):
4735 video_id = self._match_id(url)
4736 raise ExtractorError(
4737 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4738 expected=True)