]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/extractor/youtube.py
Add field `webpage_url_domain`
[yt-dlp.git] / yt_dlp / extractor / youtube.py
... / ...
CommitLineData
1# coding: utf-8
2
3from __future__ import unicode_literals
4
5import base64
6import calendar
7import copy
8import datetime
9import hashlib
10import itertools
11import json
12import math
13import os.path
14import random
15import re
16import time
17import traceback
18
19from .common import InfoExtractor, SearchInfoExtractor
20from ..compat import (
21 compat_chr,
22 compat_HTTPError,
23 compat_parse_qs,
24 compat_str,
25 compat_urllib_parse_unquote_plus,
26 compat_urllib_parse_urlencode,
27 compat_urllib_parse_urlparse,
28 compat_urlparse,
29)
30from ..jsinterp import JSInterpreter
31from ..utils import (
32 bug_reports_message,
33 bytes_to_intlist,
34 clean_html,
35 datetime_from_str,
36 dict_get,
37 error_to_compat_str,
38 ExtractorError,
39 float_or_none,
40 format_field,
41 int_or_none,
42 intlist_to_bytes,
43 is_html,
44 join_nonempty,
45 mimetype2ext,
46 network_exceptions,
47 NO_DEFAULT,
48 orderedSet,
49 parse_codecs,
50 parse_count,
51 parse_duration,
52 parse_iso8601,
53 parse_qs,
54 qualities,
55 remove_end,
56 remove_start,
57 smuggle_url,
58 str_or_none,
59 str_to_int,
60 traverse_obj,
61 try_get,
62 unescapeHTML,
63 unified_strdate,
64 unsmuggle_url,
65 update_url_query,
66 url_or_none,
67 urljoin,
68 variadic,
69)
70
71
72def get_first(obj, keys, **kwargs):
73 return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
74
75
76# any clients starting with _ cannot be explicity requested by the user
77INNERTUBE_CLIENTS = {
78 'web': {
79 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
80 'INNERTUBE_CONTEXT': {
81 'client': {
82 'clientName': 'WEB',
83 'clientVersion': '2.20210622.10.00',
84 }
85 },
86 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
87 },
88 'web_embedded': {
89 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
90 'INNERTUBE_CONTEXT': {
91 'client': {
92 'clientName': 'WEB_EMBEDDED_PLAYER',
93 'clientVersion': '1.20210620.0.1',
94 },
95 },
96 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
97 },
98 'web_music': {
99 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
100 'INNERTUBE_HOST': 'music.youtube.com',
101 'INNERTUBE_CONTEXT': {
102 'client': {
103 'clientName': 'WEB_REMIX',
104 'clientVersion': '1.20210621.00.00',
105 }
106 },
107 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
108 },
109 'web_creator': {
110 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
111 'INNERTUBE_CONTEXT': {
112 'client': {
113 'clientName': 'WEB_CREATOR',
114 'clientVersion': '1.20210621.00.00',
115 }
116 },
117 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
118 },
119 'android': {
120 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
121 'INNERTUBE_CONTEXT': {
122 'client': {
123 'clientName': 'ANDROID',
124 'clientVersion': '16.20',
125 }
126 },
127 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
128 'REQUIRE_JS_PLAYER': False
129 },
130 'android_embedded': {
131 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
132 'INNERTUBE_CONTEXT': {
133 'client': {
134 'clientName': 'ANDROID_EMBEDDED_PLAYER',
135 'clientVersion': '16.20',
136 },
137 },
138 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
139 'REQUIRE_JS_PLAYER': False
140 },
141 'android_music': {
142 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
143 'INNERTUBE_HOST': 'music.youtube.com',
144 'INNERTUBE_CONTEXT': {
145 'client': {
146 'clientName': 'ANDROID_MUSIC',
147 'clientVersion': '4.32',
148 }
149 },
150 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
151 'REQUIRE_JS_PLAYER': False
152 },
153 'android_creator': {
154 'INNERTUBE_CONTEXT': {
155 'client': {
156 'clientName': 'ANDROID_CREATOR',
157 'clientVersion': '21.24.100',
158 },
159 },
160 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
161 'REQUIRE_JS_PLAYER': False
162 },
163 # ios has HLS live streams
164 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
165 'ios': {
166 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
167 'INNERTUBE_CONTEXT': {
168 'client': {
169 'clientName': 'IOS',
170 'clientVersion': '16.20',
171 }
172 },
173 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
174 'REQUIRE_JS_PLAYER': False
175 },
176 'ios_embedded': {
177 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
178 'INNERTUBE_CONTEXT': {
179 'client': {
180 'clientName': 'IOS_MESSAGES_EXTENSION',
181 'clientVersion': '16.20',
182 },
183 },
184 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
185 'REQUIRE_JS_PLAYER': False
186 },
187 'ios_music': {
188 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
189 'INNERTUBE_HOST': 'music.youtube.com',
190 'INNERTUBE_CONTEXT': {
191 'client': {
192 'clientName': 'IOS_MUSIC',
193 'clientVersion': '4.32',
194 },
195 },
196 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
197 'REQUIRE_JS_PLAYER': False
198 },
199 'ios_creator': {
200 'INNERTUBE_CONTEXT': {
201 'client': {
202 'clientName': 'IOS_CREATOR',
203 'clientVersion': '21.24.100',
204 },
205 },
206 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
207 'REQUIRE_JS_PLAYER': False
208 },
209 # mweb has 'ultralow' formats
210 # See: https://github.com/yt-dlp/yt-dlp/pull/557
211 'mweb': {
212 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
213 'INNERTUBE_CONTEXT': {
214 'client': {
215 'clientName': 'MWEB',
216 'clientVersion': '2.20210721.07.00',
217 }
218 },
219 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
220 },
221}
222
223
224def build_innertube_clients():
225 third_party = {
226 'embedUrl': 'https://google.com', # Can be any valid URL
227 }
228 base_clients = ('android', 'web', 'ios', 'mweb')
229 priority = qualities(base_clients[::-1])
230
231 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
232 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
233 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
234 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
235 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
236 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
237
238 if client in base_clients:
239 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
240 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
241 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
242 agegate_ytcfg['priority'] -= 1
243 elif client.endswith('_embedded'):
244 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
245 ytcfg['priority'] -= 2
246 else:
247 ytcfg['priority'] -= 3
248
249
250build_innertube_clients()
251
252
253class YoutubeBaseInfoExtractor(InfoExtractor):
254 """Provide base functions for Youtube extractors"""
255
256 _RESERVED_NAMES = (
257 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
258 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
259 r'browse|oembed|get_video_info|iframe_api|s/player|'
260 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
261
262 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
263
264 _NETRC_MACHINE = 'youtube'
265
266 # If True it will raise an error if no login info is provided
267 _LOGIN_REQUIRED = False
268
269 _INVIDIOUS_SITES = (
270 # invidious-redirect websites
271 r'(?:www\.)?redirect\.invidious\.io',
272 r'(?:(?:www|dev)\.)?invidio\.us',
273 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
274 r'(?:www\.)?invidious\.pussthecat\.org',
275 r'(?:www\.)?invidious\.zee\.li',
276 r'(?:www\.)?invidious\.ethibox\.fr',
277 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
278 # youtube-dl invidious instances list
279 r'(?:(?:www|no)\.)?invidiou\.sh',
280 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
281 r'(?:www\.)?invidious\.kabi\.tk',
282 r'(?:www\.)?invidious\.mastodon\.host',
283 r'(?:www\.)?invidious\.zapashcanon\.fr',
284 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
285 r'(?:www\.)?invidious\.tinfoil-hat\.net',
286 r'(?:www\.)?invidious\.himiko\.cloud',
287 r'(?:www\.)?invidious\.reallyancient\.tech',
288 r'(?:www\.)?invidious\.tube',
289 r'(?:www\.)?invidiou\.site',
290 r'(?:www\.)?invidious\.site',
291 r'(?:www\.)?invidious\.xyz',
292 r'(?:www\.)?invidious\.nixnet\.xyz',
293 r'(?:www\.)?invidious\.048596\.xyz',
294 r'(?:www\.)?invidious\.drycat\.fr',
295 r'(?:www\.)?inv\.skyn3t\.in',
296 r'(?:www\.)?tube\.poal\.co',
297 r'(?:www\.)?tube\.connect\.cafe',
298 r'(?:www\.)?vid\.wxzm\.sx',
299 r'(?:www\.)?vid\.mint\.lgbt',
300 r'(?:www\.)?vid\.puffyan\.us',
301 r'(?:www\.)?yewtu\.be',
302 r'(?:www\.)?yt\.elukerio\.org',
303 r'(?:www\.)?yt\.lelux\.fi',
304 r'(?:www\.)?invidious\.ggc-project\.de',
305 r'(?:www\.)?yt\.maisputain\.ovh',
306 r'(?:www\.)?ytprivate\.com',
307 r'(?:www\.)?invidious\.13ad\.de',
308 r'(?:www\.)?invidious\.toot\.koeln',
309 r'(?:www\.)?invidious\.fdn\.fr',
310 r'(?:www\.)?watch\.nettohikari\.com',
311 r'(?:www\.)?invidious\.namazso\.eu',
312 r'(?:www\.)?invidious\.silkky\.cloud',
313 r'(?:www\.)?invidious\.exonip\.de',
314 r'(?:www\.)?invidious\.riverside\.rocks',
315 r'(?:www\.)?invidious\.blamefran\.net',
316 r'(?:www\.)?invidious\.moomoo\.de',
317 r'(?:www\.)?ytb\.trom\.tf',
318 r'(?:www\.)?yt\.cyberhost\.uk',
319 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
320 r'(?:www\.)?qklhadlycap4cnod\.onion',
321 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
322 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
323 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
324 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
325 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
326 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
327 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
328 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
329 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
330 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
331 )
332
333 def _login(self):
334 """
335 Attempt to log in to YouTube.
336 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
337 """
338
339 if (self._LOGIN_REQUIRED
340 and self.get_param('cookiefile') is None
341 and self.get_param('cookiesfrombrowser') is None):
342 self.raise_login_required(
343 'Login details are needed to download this content', method='cookies')
344 username, password = self._get_login_info()
345 if username:
346 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
347
348 def _initialize_consent(self):
349 cookies = self._get_cookies('https://www.youtube.com/')
350 if cookies.get('__Secure-3PSID'):
351 return
352 consent_id = None
353 consent = cookies.get('CONSENT')
354 if consent:
355 if 'YES' in consent.value:
356 return
357 consent_id = self._search_regex(
358 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
359 if not consent_id:
360 consent_id = random.randint(100, 999)
361 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
362
363 def _real_initialize(self):
364 self._initialize_consent()
365 self._login()
366
367 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
368 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
369 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
370
371 def _get_default_ytcfg(self, client='web'):
372 return copy.deepcopy(INNERTUBE_CLIENTS[client])
373
374 def _get_innertube_host(self, client='web'):
375 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
376
377 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
378 # try_get but with fallback to default ytcfg client values when present
379 _func = lambda y: try_get(y, getter, expected_type)
380 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
381
382 def _extract_client_name(self, ytcfg, default_client='web'):
383 return self._ytcfg_get_safe(
384 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
385 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
386
387 def _extract_client_version(self, ytcfg, default_client='web'):
388 return self._ytcfg_get_safe(
389 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
390 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
391
392 def _extract_api_key(self, ytcfg=None, default_client='web'):
393 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
394
395 def _extract_context(self, ytcfg=None, default_client='web'):
396 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
397 context = _get_context(ytcfg)
398 if context:
399 return context
400
401 context = _get_context(self._get_default_ytcfg(default_client))
402 if not ytcfg:
403 return context
404
405 # Recreate the client context (required)
406 context['client'].update({
407 'clientVersion': self._extract_client_version(ytcfg, default_client),
408 'clientName': self._extract_client_name(ytcfg, default_client),
409 })
410 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
411 if visitor_data:
412 context['client']['visitorData'] = visitor_data
413 return context
414
415 _SAPISID = None
416
417 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
418 time_now = round(time.time())
419 if self._SAPISID is None:
420 yt_cookies = self._get_cookies('https://www.youtube.com')
421 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
422 # See: https://github.com/yt-dlp/yt-dlp/issues/393
423 sapisid_cookie = dict_get(
424 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
425 if sapisid_cookie and sapisid_cookie.value:
426 self._SAPISID = sapisid_cookie.value
427 self.write_debug('Extracted SAPISID cookie')
428 # SAPISID cookie is required if not already present
429 if not yt_cookies.get('SAPISID'):
430 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
431 self._set_cookie(
432 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
433 else:
434 self._SAPISID = False
435 if not self._SAPISID:
436 return None
437 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
438 sapisidhash = hashlib.sha1(
439 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
440 return f'SAPISIDHASH {time_now}_{sapisidhash}'
441
442 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
443 note='Downloading API JSON', errnote='Unable to download API page',
444 context=None, api_key=None, api_hostname=None, default_client='web'):
445
446 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
447 data.update(query)
448 real_headers = self.generate_api_headers(default_client=default_client)
449 real_headers.update({'content-type': 'application/json'})
450 if headers:
451 real_headers.update(headers)
452 return self._download_json(
453 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
454 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
455 data=json.dumps(data).encode('utf8'), headers=real_headers,
456 query={'key': api_key or self._extract_api_key()})
457
458 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
459 data = self._search_regex(
460 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
461 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
462 if data:
463 return self._parse_json(data, item_id, fatal=fatal)
464
465 @staticmethod
466 def _extract_session_index(*data):
467 """
468 Index of current account in account list.
469 See: https://github.com/yt-dlp/yt-dlp/pull/519
470 """
471 for ytcfg in data:
472 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
473 if session_index is not None:
474 return session_index
475
476 # Deprecated?
477 def _extract_identity_token(self, ytcfg=None, webpage=None):
478 if ytcfg:
479 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
480 if token:
481 return token
482 if webpage:
483 return self._search_regex(
484 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
485 'identity token', default=None, fatal=False)
486
487 @staticmethod
488 def _extract_account_syncid(*args):
489 """
490 Extract syncId required to download private playlists of secondary channels
491 @params response and/or ytcfg
492 """
493 for data in args:
494 # ytcfg includes channel_syncid if on secondary channel
495 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
496 if delegated_sid:
497 return delegated_sid
498 sync_ids = (try_get(
499 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
500 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
501 if len(sync_ids) >= 2 and sync_ids[1]:
502 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
503 # and just "user_syncid||" for primary channel. We only want the channel_syncid
504 return sync_ids[0]
505
506 @staticmethod
507 def _extract_visitor_data(*args):
508 """
509 Extracts visitorData from an API response or ytcfg
510 Appears to be used to track session state
511 """
512 return get_first(
513 args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
514 expected_type=str)
515
516 @property
517 def is_authenticated(self):
518 return bool(self._generate_sapisidhash_header())
519
520 def extract_ytcfg(self, video_id, webpage):
521 if not webpage:
522 return {}
523 return self._parse_json(
524 self._search_regex(
525 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
526 default='{}'), video_id, fatal=False) or {}
527
528 def generate_api_headers(
529 self, *, ytcfg=None, account_syncid=None, session_index=None,
530 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
531
532 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
533 headers = {
534 'X-YouTube-Client-Name': compat_str(
535 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
536 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
537 'Origin': origin,
538 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
539 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
540 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
541 }
542 if session_index is None:
543 session_index = self._extract_session_index(ytcfg)
544 if account_syncid or session_index is not None:
545 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
546
547 auth = self._generate_sapisidhash_header(origin)
548 if auth is not None:
549 headers['Authorization'] = auth
550 headers['X-Origin'] = origin
551 return {h: v for h, v in headers.items() if v is not None}
552
553 @staticmethod
554 def _build_api_continuation_query(continuation, ctp=None):
555 query = {
556 'continuation': continuation
557 }
558 # TODO: Inconsistency with clickTrackingParams.
559 # Currently we have a fixed ctp contained within context (from ytcfg)
560 # and a ctp in root query for continuation.
561 if ctp:
562 query['clickTracking'] = {'clickTrackingParams': ctp}
563 return query
564
565 @classmethod
566 def _extract_next_continuation_data(cls, renderer):
567 next_continuation = try_get(
568 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
569 lambda x: x['continuation']['reloadContinuationData']), dict)
570 if not next_continuation:
571 return
572 continuation = next_continuation.get('continuation')
573 if not continuation:
574 return
575 ctp = next_continuation.get('clickTrackingParams')
576 return cls._build_api_continuation_query(continuation, ctp)
577
578 @classmethod
579 def _extract_continuation_ep_data(cls, continuation_ep: dict):
580 if isinstance(continuation_ep, dict):
581 continuation = try_get(
582 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
583 if not continuation:
584 return
585 ctp = continuation_ep.get('clickTrackingParams')
586 return cls._build_api_continuation_query(continuation, ctp)
587
588 @classmethod
589 def _extract_continuation(cls, renderer):
590 next_continuation = cls._extract_next_continuation_data(renderer)
591 if next_continuation:
592 return next_continuation
593
594 contents = []
595 for key in ('contents', 'items'):
596 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
597
598 for content in contents:
599 if not isinstance(content, dict):
600 continue
601 continuation_ep = try_get(
602 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
603 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
604 dict)
605 continuation = cls._extract_continuation_ep_data(continuation_ep)
606 if continuation:
607 return continuation
608
609 @classmethod
610 def _extract_alerts(cls, data):
611 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
612 if not isinstance(alert_dict, dict):
613 continue
614 for alert in alert_dict.values():
615 alert_type = alert.get('type')
616 if not alert_type:
617 continue
618 message = cls._get_text(alert, 'text')
619 if message:
620 yield alert_type, message
621
622 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
623 errors = []
624 warnings = []
625 for alert_type, alert_message in alerts:
626 if alert_type.lower() == 'error' and fatal:
627 errors.append([alert_type, alert_message])
628 else:
629 warnings.append([alert_type, alert_message])
630
631 for alert_type, alert_message in (warnings + errors[:-1]):
632 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
633 if errors:
634 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
635
636 def _extract_and_report_alerts(self, data, *args, **kwargs):
637 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
638
639 def _extract_badges(self, renderer: dict):
640 badges = set()
641 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
642 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
643 if label:
644 badges.add(label.lower())
645 return badges
646
647 @staticmethod
648 def _get_text(data, *path_list, max_runs=None):
649 for path in path_list or [None]:
650 if path is None:
651 obj = [data]
652 else:
653 obj = traverse_obj(data, path, default=[])
654 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
655 obj = [obj]
656 for item in obj:
657 text = try_get(item, lambda x: x['simpleText'], compat_str)
658 if text:
659 return text
660 runs = try_get(item, lambda x: x['runs'], list) or []
661 if not runs and isinstance(item, list):
662 runs = item
663
664 runs = runs[:min(len(runs), max_runs or len(runs))]
665 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
666 if text:
667 return text
668
669 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
670 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
671 default_client='web'):
672 response = None
673 last_error = None
674 count = -1
675 retries = self.get_param('extractor_retries', 3)
676 if check_get_keys is None:
677 check_get_keys = []
678 while count < retries:
679 count += 1
680 if last_error:
681 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
682 try:
683 response = self._call_api(
684 ep=ep, fatal=True, headers=headers,
685 video_id=item_id, query=query,
686 context=self._extract_context(ytcfg, default_client),
687 api_key=self._extract_api_key(ytcfg, default_client),
688 api_hostname=api_hostname, default_client=default_client,
689 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
690 except ExtractorError as e:
691 if isinstance(e.cause, network_exceptions):
692 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
693 e.cause.seek(0)
694 yt_error = try_get(
695 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
696 lambda x: x['error']['message'], compat_str)
697 if yt_error:
698 self._report_alerts([('ERROR', yt_error)], fatal=False)
699 # Downloading page may result in intermittent 5xx HTTP error
700 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
701 # We also want to catch all other network exceptions since errors in later pages can be troublesome
702 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
703 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
704 last_error = error_to_compat_str(e.cause or e.msg)
705 if count < retries:
706 continue
707 if fatal:
708 raise
709 else:
710 self.report_warning(error_to_compat_str(e))
711 return
712
713 else:
714 try:
715 self._extract_and_report_alerts(response, only_once=True)
716 except ExtractorError as e:
717 # YouTube servers may return errors we want to retry on in a 200 OK response
718 # See: https://github.com/yt-dlp/yt-dlp/issues/839
719 if 'unknown error' in e.msg.lower():
720 last_error = e.msg
721 continue
722 if fatal:
723 raise
724 self.report_warning(error_to_compat_str(e))
725 return
726 if not check_get_keys or dict_get(response, check_get_keys):
727 break
728 # Youtube sometimes sends incomplete data
729 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
730 last_error = 'Incomplete data received'
731 if count >= retries:
732 if fatal:
733 raise ExtractorError(last_error)
734 else:
735 self.report_warning(last_error)
736 return
737 return response
738
739 @staticmethod
740 def is_music_url(url):
741 return re.match(r'https?://music\.youtube\.com/', url) is not None
742
743 def _extract_video(self, renderer):
744 video_id = renderer.get('videoId')
745 title = self._get_text(renderer, 'title')
746 description = self._get_text(renderer, 'descriptionSnippet')
747 duration = parse_duration(self._get_text(
748 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
749 view_count_text = self._get_text(renderer, 'viewCountText') or ''
750 view_count = str_to_int(self._search_regex(
751 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
752 'view count', default=None))
753
754 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
755
756 return {
757 '_type': 'url',
758 'ie_key': YoutubeIE.ie_key(),
759 'id': video_id,
760 'url': f'https://www.youtube.com/watch?v={video_id}',
761 'title': title,
762 'description': description,
763 'duration': duration,
764 'view_count': view_count,
765 'uploader': uploader,
766 }
767
768
769class YoutubeIE(YoutubeBaseInfoExtractor):
770 IE_DESC = 'YouTube'
771 _VALID_URL = r"""(?x)^
772 (
773 (?:https?://|//) # http(s):// or protocol-independent URL
774 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
775 (?:www\.)?deturl\.com/www\.youtube\.com|
776 (?:www\.)?pwnyoutube\.com|
777 (?:www\.)?hooktube\.com|
778 (?:www\.)?yourepeat\.com|
779 tube\.majestyc\.net|
780 %(invidious)s|
781 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
782 (?:.*?\#/)? # handle anchor (#/) redirect urls
783 (?: # the various things that can precede the ID:
784 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
785 |(?: # or the v= param in all its forms
786 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
787 (?:\?|\#!?) # the params delimiter ? or # or #!
788 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
789 v=
790 )
791 ))
792 |(?:
793 youtu\.be| # just youtu.be/xxxx
794 vid\.plus| # or vid.plus/xxxx
795 zwearz\.com/watch| # or zwearz.com/watch/xxxx
796 %(invidious)s
797 )/
798 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
799 )
800 )? # all until now is optional -> you can pass the naked ID
801 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
802 (?(1).+)? # if we found the ID, everything can follow
803 (?:\#|$)""" % {
804 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
805 }
806 _PLAYER_INFO_RE = (
807 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
808 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
809 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
810 )
811 _formats = {
812 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
813 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
814 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
815 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
816 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
817 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
818 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
819 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
820 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
821 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
822 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
823 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
824 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
825 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
826 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
827 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
828 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
829 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
830
831
832 # 3D videos
833 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
834 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
835 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
836 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
837 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
838 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
839 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
840
841 # Apple HTTP Live Streaming
842 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
843 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
844 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
845 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
846 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
847 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
848 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
849 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
850
851 # DASH mp4 video
852 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
853 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
854 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
855 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
856 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
857 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
858 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
859 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
860 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
861 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
862 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
863 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
864
865 # Dash mp4 audio
866 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
867 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
868 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
869 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
870 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
871 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
872 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
873
874 # Dash webm
875 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
876 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
877 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
878 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
879 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
880 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
881 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
882 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
883 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
884 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
885 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
886 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
887 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
888 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
889 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
890 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
891 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
892 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
893 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
894 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
895 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
896 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
897
898 # Dash webm audio
899 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
900 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
901
902 # Dash webm audio with opus inside
903 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
904 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
905 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
906
907 # RTMP (unnamed)
908 '_rtmp': {'protocol': 'rtmp'},
909
910 # av01 video only formats sometimes served with "unknown" codecs
911 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
912 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
913 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
914 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
915 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
916 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
917 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
918 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
919 }
920 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
921
922 _GEO_BYPASS = False
923
924 IE_NAME = 'youtube'
925 _TESTS = [
926 {
927 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
928 'info_dict': {
929 'id': 'BaW_jenozKc',
930 'ext': 'mp4',
931 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
932 'uploader': 'Philipp Hagemeister',
933 'uploader_id': 'phihag',
934 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
935 'channel': 'Philipp Hagemeister',
936 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
937 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
938 'upload_date': '20121002',
939 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
940 'categories': ['Science & Technology'],
941 'tags': ['youtube-dl'],
942 'duration': 10,
943 'view_count': int,
944 'like_count': int,
945 # 'dislike_count': int,
946 'availability': 'public',
947 'playable_in_embed': True,
948 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
949 'live_status': 'not_live',
950 'age_limit': 0,
951 'start_time': 1,
952 'end_time': 9,
953 }
954 },
955 {
956 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
957 'note': 'Embed-only video (#1746)',
958 'info_dict': {
959 'id': 'yZIXLfi8CZQ',
960 'ext': 'mp4',
961 'upload_date': '20120608',
962 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
963 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
964 'uploader': 'SET India',
965 'uploader_id': 'setindia',
966 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
967 'age_limit': 18,
968 },
969 'skip': 'Private video',
970 },
971 {
972 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
973 'note': 'Use the first video ID in the URL',
974 'info_dict': {
975 'id': 'BaW_jenozKc',
976 'ext': 'mp4',
977 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
978 'uploader': 'Philipp Hagemeister',
979 'uploader_id': 'phihag',
980 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
981 'upload_date': '20121002',
982 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
983 'categories': ['Science & Technology'],
984 'tags': ['youtube-dl'],
985 'duration': 10,
986 'view_count': int,
987 'like_count': int,
988 'dislike_count': int,
989 },
990 'params': {
991 'skip_download': True,
992 },
993 },
994 {
995 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
996 'note': '256k DASH audio (format 141) via DASH manifest',
997 'info_dict': {
998 'id': 'a9LDPn-MO4I',
999 'ext': 'm4a',
1000 'upload_date': '20121002',
1001 'uploader_id': '8KVIDEO',
1002 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1003 'description': '',
1004 'uploader': '8KVIDEO',
1005 'title': 'UHDTV TEST 8K VIDEO.mp4'
1006 },
1007 'params': {
1008 'youtube_include_dash_manifest': True,
1009 'format': '141',
1010 },
1011 'skip': 'format 141 not served anymore',
1012 },
1013 # DASH manifest with encrypted signature
1014 {
1015 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1016 'info_dict': {
1017 'id': 'IB3lcPjvWLA',
1018 'ext': 'm4a',
1019 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1020 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1021 'duration': 244,
1022 'uploader': 'AfrojackVEVO',
1023 'uploader_id': 'AfrojackVEVO',
1024 'upload_date': '20131011',
1025 'abr': 129.495,
1026 },
1027 'params': {
1028 'youtube_include_dash_manifest': True,
1029 'format': '141/bestaudio[ext=m4a]',
1030 },
1031 },
1032 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1033 {
1034 'note': 'Embed allowed age-gate video',
1035 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1036 'info_dict': {
1037 'id': 'HtVdAasjOgU',
1038 'ext': 'mp4',
1039 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1040 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1041 'duration': 142,
1042 'uploader': 'The Witcher',
1043 'uploader_id': 'WitcherGame',
1044 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1045 'upload_date': '20140605',
1046 'age_limit': 18,
1047 },
1048 },
1049 {
1050 'note': 'Age-gate video with embed allowed in public site',
1051 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1052 'info_dict': {
1053 'id': 'HsUATh_Nc2U',
1054 'ext': 'mp4',
1055 'title': 'Godzilla 2 (Official Video)',
1056 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1057 'upload_date': '20200408',
1058 'uploader_id': 'FlyingKitty900',
1059 'uploader': 'FlyingKitty',
1060 'age_limit': 18,
1061 },
1062 },
1063 {
1064 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1065 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1066 'info_dict': {
1067 'id': 'Tq92D6wQ1mg',
1068 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1069 'ext': 'mp4',
1070 'upload_date': '20191227',
1071 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1072 'uploader': 'Projekt Melody',
1073 'description': 'md5:17eccca93a786d51bc67646756894066',
1074 'age_limit': 18,
1075 },
1076 },
1077 {
1078 'note': 'Non-Agegated non-embeddable video',
1079 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1080 'info_dict': {
1081 'id': 'MeJVWBSsPAY',
1082 'ext': 'mp4',
1083 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1084 'uploader': 'Herr Lurik',
1085 'uploader_id': 'st3in234',
1086 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1087 'upload_date': '20130730',
1088 },
1089 },
1090 {
1091 'note': 'Non-bypassable age-gated video',
1092 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1093 'only_matching': True,
1094 },
1095 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1096 # YouTube Red ad is not captured for creator
1097 {
1098 'url': '__2ABJjxzNo',
1099 'info_dict': {
1100 'id': '__2ABJjxzNo',
1101 'ext': 'mp4',
1102 'duration': 266,
1103 'upload_date': '20100430',
1104 'uploader_id': 'deadmau5',
1105 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1106 'creator': 'deadmau5',
1107 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1108 'uploader': 'deadmau5',
1109 'title': 'Deadmau5 - Some Chords (HD)',
1110 'alt_title': 'Some Chords',
1111 },
1112 'expected_warnings': [
1113 'DASH manifest missing',
1114 ]
1115 },
1116 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1117 {
1118 'url': 'lqQg6PlCWgI',
1119 'info_dict': {
1120 'id': 'lqQg6PlCWgI',
1121 'ext': 'mp4',
1122 'duration': 6085,
1123 'upload_date': '20150827',
1124 'uploader_id': 'olympic',
1125 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1126 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1127 'uploader': 'Olympics',
1128 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1129 },
1130 'params': {
1131 'skip_download': 'requires avconv',
1132 }
1133 },
1134 # Non-square pixels
1135 {
1136 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1137 'info_dict': {
1138 'id': '_b-2C3KPAM0',
1139 'ext': 'mp4',
1140 'stretched_ratio': 16 / 9.,
1141 'duration': 85,
1142 'upload_date': '20110310',
1143 'uploader_id': 'AllenMeow',
1144 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1145 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1146 'uploader': '孫ᄋᄅ',
1147 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1148 },
1149 },
1150 # url_encoded_fmt_stream_map is empty string
1151 {
1152 'url': 'qEJwOuvDf7I',
1153 'info_dict': {
1154 'id': 'qEJwOuvDf7I',
1155 'ext': 'webm',
1156 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1157 'description': '',
1158 'upload_date': '20150404',
1159 'uploader_id': 'spbelect',
1160 'uploader': 'Наблюдатели Петербурга',
1161 },
1162 'params': {
1163 'skip_download': 'requires avconv',
1164 },
1165 'skip': 'This live event has ended.',
1166 },
1167 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1168 {
1169 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1170 'info_dict': {
1171 'id': 'FIl7x6_3R5Y',
1172 'ext': 'webm',
1173 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1174 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1175 'duration': 220,
1176 'upload_date': '20150625',
1177 'uploader_id': 'dorappi2000',
1178 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1179 'uploader': 'dorappi2000',
1180 'formats': 'mincount:31',
1181 },
1182 'skip': 'not actual anymore',
1183 },
1184 # DASH manifest with segment_list
1185 {
1186 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1187 'md5': '8ce563a1d667b599d21064e982ab9e31',
1188 'info_dict': {
1189 'id': 'CsmdDsKjzN8',
1190 'ext': 'mp4',
1191 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1192 'uploader': 'Airtek',
1193 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1194 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1195 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1196 },
1197 'params': {
1198 'youtube_include_dash_manifest': True,
1199 'format': '135', # bestvideo
1200 },
1201 'skip': 'This live event has ended.',
1202 },
1203 {
1204 # Multifeed videos (multiple cameras), URL is for Main Camera
1205 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1206 'info_dict': {
1207 'id': 'jvGDaLqkpTg',
1208 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1209 'description': 'md5:e03b909557865076822aa169218d6a5d',
1210 },
1211 'playlist': [{
1212 'info_dict': {
1213 'id': 'jvGDaLqkpTg',
1214 'ext': 'mp4',
1215 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1216 'description': 'md5:e03b909557865076822aa169218d6a5d',
1217 'duration': 10643,
1218 'upload_date': '20161111',
1219 'uploader': 'Team PGP',
1220 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1221 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1222 },
1223 }, {
1224 'info_dict': {
1225 'id': '3AKt1R1aDnw',
1226 'ext': 'mp4',
1227 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1228 'description': 'md5:e03b909557865076822aa169218d6a5d',
1229 'duration': 10991,
1230 'upload_date': '20161111',
1231 'uploader': 'Team PGP',
1232 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1233 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1234 },
1235 }, {
1236 'info_dict': {
1237 'id': 'RtAMM00gpVc',
1238 'ext': 'mp4',
1239 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1240 'description': 'md5:e03b909557865076822aa169218d6a5d',
1241 'duration': 10995,
1242 'upload_date': '20161111',
1243 'uploader': 'Team PGP',
1244 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1245 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1246 },
1247 }, {
1248 'info_dict': {
1249 'id': '6N2fdlP3C5U',
1250 'ext': 'mp4',
1251 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1252 'description': 'md5:e03b909557865076822aa169218d6a5d',
1253 'duration': 10990,
1254 'upload_date': '20161111',
1255 'uploader': 'Team PGP',
1256 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1257 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1258 },
1259 }],
1260 'params': {
1261 'skip_download': True,
1262 },
1263 'skip': 'Not multifeed anymore',
1264 },
1265 {
1266 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1267 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1268 'info_dict': {
1269 'id': 'gVfLd0zydlo',
1270 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1271 },
1272 'playlist_count': 2,
1273 'skip': 'Not multifeed anymore',
1274 },
1275 {
1276 'url': 'https://vid.plus/FlRa-iH7PGw',
1277 'only_matching': True,
1278 },
1279 {
1280 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1281 'only_matching': True,
1282 },
1283 {
1284 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1285 # Also tests cut-off URL expansion in video description (see
1286 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1287 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1288 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1289 'info_dict': {
1290 'id': 'lsguqyKfVQg',
1291 'ext': 'mp4',
1292 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1293 'alt_title': 'Dark Walk',
1294 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1295 'duration': 133,
1296 'upload_date': '20151119',
1297 'uploader_id': 'IronSoulElf',
1298 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1299 'uploader': 'IronSoulElf',
1300 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1301 'track': 'Dark Walk',
1302 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1303 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1304 },
1305 'params': {
1306 'skip_download': True,
1307 },
1308 },
1309 {
1310 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1311 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1312 'only_matching': True,
1313 },
1314 {
1315 # Video with yt:stretch=17:0
1316 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1317 'info_dict': {
1318 'id': 'Q39EVAstoRM',
1319 'ext': 'mp4',
1320 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1321 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1322 'upload_date': '20151107',
1323 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1324 'uploader': 'CH GAMER DROID',
1325 },
1326 'params': {
1327 'skip_download': True,
1328 },
1329 'skip': 'This video does not exist.',
1330 },
1331 {
1332 # Video with incomplete 'yt:stretch=16:'
1333 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1334 'only_matching': True,
1335 },
1336 {
1337 # Video licensed under Creative Commons
1338 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1339 'info_dict': {
1340 'id': 'M4gD1WSo5mA',
1341 'ext': 'mp4',
1342 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1343 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1344 'duration': 721,
1345 'upload_date': '20150127',
1346 'uploader_id': 'BerkmanCenter',
1347 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1348 'uploader': 'The Berkman Klein Center for Internet & Society',
1349 'license': 'Creative Commons Attribution license (reuse allowed)',
1350 },
1351 'params': {
1352 'skip_download': True,
1353 },
1354 },
1355 {
1356 # Channel-like uploader_url
1357 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1358 'info_dict': {
1359 'id': 'eQcmzGIKrzg',
1360 'ext': 'mp4',
1361 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1362 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1363 'duration': 4060,
1364 'upload_date': '20151119',
1365 'uploader': 'Bernie Sanders',
1366 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1367 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1368 'license': 'Creative Commons Attribution license (reuse allowed)',
1369 },
1370 'params': {
1371 'skip_download': True,
1372 },
1373 },
1374 {
1375 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1376 'only_matching': True,
1377 },
1378 {
1379 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1380 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1381 'only_matching': True,
1382 },
1383 {
1384 # Rental video preview
1385 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1386 'info_dict': {
1387 'id': 'uGpuVWrhIzE',
1388 'ext': 'mp4',
1389 'title': 'Piku - Trailer',
1390 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1391 'upload_date': '20150811',
1392 'uploader': 'FlixMatrix',
1393 'uploader_id': 'FlixMatrixKaravan',
1394 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1395 'license': 'Standard YouTube License',
1396 },
1397 'params': {
1398 'skip_download': True,
1399 },
1400 'skip': 'This video is not available.',
1401 },
1402 {
1403 # YouTube Red video with episode data
1404 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1405 'info_dict': {
1406 'id': 'iqKdEhx-dD4',
1407 'ext': 'mp4',
1408 'title': 'Isolation - Mind Field (Ep 1)',
1409 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1410 'duration': 2085,
1411 'upload_date': '20170118',
1412 'uploader': 'Vsauce',
1413 'uploader_id': 'Vsauce',
1414 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1415 'series': 'Mind Field',
1416 'season_number': 1,
1417 'episode_number': 1,
1418 },
1419 'params': {
1420 'skip_download': True,
1421 },
1422 'expected_warnings': [
1423 'Skipping DASH manifest',
1424 ],
1425 },
1426 {
1427 # The following content has been identified by the YouTube community
1428 # as inappropriate or offensive to some audiences.
1429 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1430 'info_dict': {
1431 'id': '6SJNVb0GnPI',
1432 'ext': 'mp4',
1433 'title': 'Race Differences in Intelligence',
1434 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1435 'duration': 965,
1436 'upload_date': '20140124',
1437 'uploader': 'New Century Foundation',
1438 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1439 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1440 },
1441 'params': {
1442 'skip_download': True,
1443 },
1444 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1445 },
1446 {
1447 # itag 212
1448 'url': '1t24XAntNCY',
1449 'only_matching': True,
1450 },
1451 {
1452 # geo restricted to JP
1453 'url': 'sJL6WA-aGkQ',
1454 'only_matching': True,
1455 },
1456 {
1457 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1458 'only_matching': True,
1459 },
1460 {
1461 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1462 'only_matching': True,
1463 },
1464 {
1465 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1466 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1467 'only_matching': True,
1468 },
1469 {
1470 # DRM protected
1471 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1472 'only_matching': True,
1473 },
1474 {
1475 # Video with unsupported adaptive stream type formats
1476 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1477 'info_dict': {
1478 'id': 'Z4Vy8R84T1U',
1479 'ext': 'mp4',
1480 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1481 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1482 'duration': 433,
1483 'upload_date': '20130923',
1484 'uploader': 'Amelia Putri Harwita',
1485 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1486 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1487 'formats': 'maxcount:10',
1488 },
1489 'params': {
1490 'skip_download': True,
1491 'youtube_include_dash_manifest': False,
1492 },
1493 'skip': 'not actual anymore',
1494 },
1495 {
1496 # Youtube Music Auto-generated description
1497 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1498 'info_dict': {
1499 'id': 'MgNrAu2pzNs',
1500 'ext': 'mp4',
1501 'title': 'Voyeur Girl',
1502 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1503 'upload_date': '20190312',
1504 'uploader': 'Stephen - Topic',
1505 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1506 'artist': 'Stephen',
1507 'track': 'Voyeur Girl',
1508 'album': 'it\'s too much love to know my dear',
1509 'release_date': '20190313',
1510 'release_year': 2019,
1511 },
1512 'params': {
1513 'skip_download': True,
1514 },
1515 },
1516 {
1517 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1518 'only_matching': True,
1519 },
1520 {
1521 # invalid -> valid video id redirection
1522 'url': 'DJztXj2GPfl',
1523 'info_dict': {
1524 'id': 'DJztXj2GPfk',
1525 'ext': 'mp4',
1526 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1527 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1528 'upload_date': '20090125',
1529 'uploader': 'Prochorowka',
1530 'uploader_id': 'Prochorowka',
1531 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1532 'artist': 'Panjabi MC',
1533 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1534 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1535 },
1536 'params': {
1537 'skip_download': True,
1538 },
1539 'skip': 'Video unavailable',
1540 },
1541 {
1542 # empty description results in an empty string
1543 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1544 'info_dict': {
1545 'id': 'x41yOUIvK2k',
1546 'ext': 'mp4',
1547 'title': 'IMG 3456',
1548 'description': '',
1549 'upload_date': '20170613',
1550 'uploader_id': 'ElevageOrVert',
1551 'uploader': 'ElevageOrVert',
1552 },
1553 'params': {
1554 'skip_download': True,
1555 },
1556 },
1557 {
1558 # with '};' inside yt initial data (see [1])
1559 # see [2] for an example with '};' inside ytInitialPlayerResponse
1560 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1561 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1562 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1563 'info_dict': {
1564 'id': 'CHqg6qOn4no',
1565 'ext': 'mp4',
1566 'title': 'Part 77 Sort a list of simple types in c#',
1567 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1568 'upload_date': '20130831',
1569 'uploader_id': 'kudvenkat',
1570 'uploader': 'kudvenkat',
1571 },
1572 'params': {
1573 'skip_download': True,
1574 },
1575 },
1576 {
1577 # another example of '};' in ytInitialData
1578 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1579 'only_matching': True,
1580 },
1581 {
1582 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1583 'only_matching': True,
1584 },
1585 {
1586 # https://github.com/ytdl-org/youtube-dl/pull/28094
1587 'url': 'OtqTfy26tG0',
1588 'info_dict': {
1589 'id': 'OtqTfy26tG0',
1590 'ext': 'mp4',
1591 'title': 'Burn Out',
1592 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1593 'upload_date': '20141120',
1594 'uploader': 'The Cinematic Orchestra - Topic',
1595 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1596 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1597 'artist': 'The Cinematic Orchestra',
1598 'track': 'Burn Out',
1599 'album': 'Every Day',
1600 'release_data': None,
1601 'release_year': None,
1602 },
1603 'params': {
1604 'skip_download': True,
1605 },
1606 },
1607 {
1608 # controversial video, only works with bpctr when authenticated with cookies
1609 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1610 'only_matching': True,
1611 },
1612 {
1613 # controversial video, requires bpctr/contentCheckOk
1614 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1615 'info_dict': {
1616 'id': 'SZJvDhaSDnc',
1617 'ext': 'mp4',
1618 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1619 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1620 'uploader': 'CBS This Morning',
1621 'uploader_id': 'CBSThisMorning',
1622 'upload_date': '20140716',
1623 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1624 }
1625 },
1626 {
1627 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1628 'url': 'cBvYw8_A0vQ',
1629 'info_dict': {
1630 'id': 'cBvYw8_A0vQ',
1631 'ext': 'mp4',
1632 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1633 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1634 'upload_date': '20201120',
1635 'uploader': 'Walk around Japan',
1636 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1637 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1638 },
1639 'params': {
1640 'skip_download': True,
1641 },
1642 }, {
1643 # Has multiple audio streams
1644 'url': 'WaOKSUlf4TM',
1645 'only_matching': True
1646 }, {
1647 # Requires Premium: has format 141 when requested using YTM url
1648 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1649 'only_matching': True
1650 }, {
1651 # multiple subtitles with same lang_code
1652 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1653 'only_matching': True,
1654 }, {
1655 # Force use android client fallback
1656 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1657 'info_dict': {
1658 'id': 'YOelRv7fMxY',
1659 'title': 'DIGGING A SECRET TUNNEL Part 1',
1660 'ext': '3gp',
1661 'upload_date': '20210624',
1662 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1663 'uploader': 'colinfurze',
1664 'uploader_id': 'colinfurze',
1665 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1666 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1667 },
1668 'params': {
1669 'format': '17', # 3gp format available on android
1670 'extractor_args': {'youtube': {'player_client': ['android']}},
1671 },
1672 },
1673 {
1674 # Skip download of additional client configs (remix client config in this case)
1675 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1676 'only_matching': True,
1677 'params': {
1678 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1679 },
1680 }, {
1681 # shorts
1682 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1683 'only_matching': True,
1684 }, {
1685 'note': 'Storyboards',
1686 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1687 'info_dict': {
1688 'id': '5KLPxDtMqe8',
1689 'ext': 'mhtml',
1690 'format_id': 'sb0',
1691 'title': 'Your Brain is Plastic',
1692 'uploader_id': 'scishow',
1693 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1694 'upload_date': '20140324',
1695 'uploader': 'SciShow',
1696 }, 'params': {'format': 'mhtml', 'skip_download': True}
1697 }
1698 ]
1699
1700 @classmethod
1701 def suitable(cls, url):
1702 from ..utils import parse_qs
1703
1704 qs = parse_qs(url)
1705 if qs.get('list', [None])[0]:
1706 return False
1707 return super(YoutubeIE, cls).suitable(url)
1708
1709 def __init__(self, *args, **kwargs):
1710 super(YoutubeIE, self).__init__(*args, **kwargs)
1711 self._code_cache = {}
1712 self._player_cache = {}
1713
1714 def _extract_player_url(self, *ytcfgs, webpage=None):
1715 player_url = traverse_obj(
1716 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1717 get_all=False, expected_type=compat_str)
1718 if not player_url:
1719 return
1720 if player_url.startswith('//'):
1721 player_url = 'https:' + player_url
1722 elif not re.match(r'https?://', player_url):
1723 player_url = compat_urlparse.urljoin(
1724 'https://www.youtube.com', player_url)
1725 return player_url
1726
1727 def _download_player_url(self, video_id, fatal=False):
1728 res = self._download_webpage(
1729 'https://www.youtube.com/iframe_api',
1730 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1731 if res:
1732 player_version = self._search_regex(
1733 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1734 if player_version:
1735 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1736
1737 def _signature_cache_id(self, example_sig):
1738 """ Return a string representation of a signature """
1739 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1740
1741 @classmethod
1742 def _extract_player_info(cls, player_url):
1743 for player_re in cls._PLAYER_INFO_RE:
1744 id_m = re.search(player_re, player_url)
1745 if id_m:
1746 break
1747 else:
1748 raise ExtractorError('Cannot identify player %r' % player_url)
1749 return id_m.group('id')
1750
1751 def _load_player(self, video_id, player_url, fatal=True):
1752 player_id = self._extract_player_info(player_url)
1753 if player_id not in self._code_cache:
1754 code = self._download_webpage(
1755 player_url, video_id, fatal=fatal,
1756 note='Downloading player ' + player_id,
1757 errnote='Download of %s failed' % player_url)
1758 if code:
1759 self._code_cache[player_id] = code
1760 return self._code_cache.get(player_id)
1761
1762 def _extract_signature_function(self, video_id, player_url, example_sig):
1763 player_id = self._extract_player_info(player_url)
1764
1765 # Read from filesystem cache
1766 func_id = 'js_%s_%s' % (
1767 player_id, self._signature_cache_id(example_sig))
1768 assert os.path.basename(func_id) == func_id
1769
1770 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1771 if cache_spec is not None:
1772 return lambda s: ''.join(s[i] for i in cache_spec)
1773
1774 code = self._load_player(video_id, player_url)
1775 if code:
1776 res = self._parse_sig_js(code)
1777
1778 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1779 cache_res = res(test_string)
1780 cache_spec = [ord(c) for c in cache_res]
1781
1782 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1783 return res
1784
1785 def _print_sig_code(self, func, example_sig):
1786 if not self.get_param('youtube_print_sig_code'):
1787 return
1788
1789 def gen_sig_code(idxs):
1790 def _genslice(start, end, step):
1791 starts = '' if start == 0 else str(start)
1792 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1793 steps = '' if step == 1 else (':%d' % step)
1794 return 's[%s%s%s]' % (starts, ends, steps)
1795
1796 step = None
1797 # Quelch pyflakes warnings - start will be set when step is set
1798 start = '(Never used)'
1799 for i, prev in zip(idxs[1:], idxs[:-1]):
1800 if step is not None:
1801 if i - prev == step:
1802 continue
1803 yield _genslice(start, prev, step)
1804 step = None
1805 continue
1806 if i - prev in [-1, 1]:
1807 step = i - prev
1808 start = prev
1809 continue
1810 else:
1811 yield 's[%d]' % prev
1812 if step is None:
1813 yield 's[%d]' % i
1814 else:
1815 yield _genslice(start, i, step)
1816
1817 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1818 cache_res = func(test_string)
1819 cache_spec = [ord(c) for c in cache_res]
1820 expr_code = ' + '.join(gen_sig_code(cache_spec))
1821 signature_id_tuple = '(%s)' % (
1822 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1823 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1824 ' return %s\n') % (signature_id_tuple, expr_code)
1825 self.to_screen('Extracted signature function:\n' + code)
1826
1827 def _parse_sig_js(self, jscode):
1828 funcname = self._search_regex(
1829 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1830 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1831 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1832 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1833 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1834 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1835 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1836 # Obsolete patterns
1837 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1838 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1839 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1840 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1841 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1842 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1843 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1844 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1845 jscode, 'Initial JS player signature function name', group='sig')
1846
1847 jsi = JSInterpreter(jscode)
1848 initial_function = jsi.extract_function(funcname)
1849 return lambda s: initial_function([s])
1850
1851 def _decrypt_signature(self, s, video_id, player_url):
1852 """Turn the encrypted s field into a working signature"""
1853
1854 if player_url is None:
1855 raise ExtractorError('Cannot decrypt signature without player_url')
1856
1857 try:
1858 player_id = (player_url, self._signature_cache_id(s))
1859 if player_id not in self._player_cache:
1860 func = self._extract_signature_function(
1861 video_id, player_url, s
1862 )
1863 self._player_cache[player_id] = func
1864 func = self._player_cache[player_id]
1865 self._print_sig_code(func, s)
1866 return func(s)
1867 except Exception as e:
1868 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
1869
1870 def _decrypt_nsig(self, s, video_id, player_url):
1871 """Turn the encrypted n field into a working signature"""
1872 if player_url is None:
1873 raise ExtractorError('Cannot decrypt nsig without player_url')
1874 if player_url.startswith('//'):
1875 player_url = 'https:' + player_url
1876 elif not re.match(r'https?://', player_url):
1877 player_url = compat_urlparse.urljoin(
1878 'https://www.youtube.com', player_url)
1879
1880 sig_id = ('nsig_value', s)
1881 if sig_id in self._player_cache:
1882 return self._player_cache[sig_id]
1883
1884 try:
1885 player_id = ('nsig', player_url)
1886 if player_id not in self._player_cache:
1887 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
1888 func = self._player_cache[player_id]
1889 self._player_cache[sig_id] = func(s)
1890 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
1891 return self._player_cache[sig_id]
1892 except Exception as e:
1893 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
1894
1895 def _extract_n_function_name(self, jscode):
1896 return self._search_regex(
1897 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
1898 jscode, 'Initial JS player n function name', group='nfunc')
1899
1900 def _extract_n_function(self, video_id, player_url):
1901 player_id = self._extract_player_info(player_url)
1902 func_code = self._downloader.cache.load('youtube-nsig', player_id)
1903
1904 if func_code:
1905 jsi = JSInterpreter(func_code)
1906 else:
1907 jscode = self._load_player(video_id, player_url)
1908 funcname = self._extract_n_function_name(jscode)
1909 jsi = JSInterpreter(jscode)
1910 func_code = jsi.extract_function_code(funcname)
1911 self._downloader.cache.store('youtube-nsig', player_id, func_code)
1912
1913 if self.get_param('youtube_print_sig_code'):
1914 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
1915
1916 return lambda s: jsi.extract_function_from_code(*func_code)([s])
1917
1918 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1919 """
1920 Extract signatureTimestamp (sts)
1921 Required to tell API what sig/player version is in use.
1922 """
1923 sts = None
1924 if isinstance(ytcfg, dict):
1925 sts = int_or_none(ytcfg.get('STS'))
1926
1927 if not sts:
1928 # Attempt to extract from player
1929 if player_url is None:
1930 error_msg = 'Cannot extract signature timestamp without player_url.'
1931 if fatal:
1932 raise ExtractorError(error_msg)
1933 self.report_warning(error_msg)
1934 return
1935 code = self._load_player(video_id, player_url, fatal=fatal)
1936 if code:
1937 sts = int_or_none(self._search_regex(
1938 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1939 'JS player signature timestamp', group='sts', fatal=fatal))
1940 return sts
1941
1942 def _mark_watched(self, video_id, player_responses):
1943 playback_url = get_first(
1944 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1945 expected_type=url_or_none)
1946 if not playback_url:
1947 self.report_warning('Unable to mark watched')
1948 return
1949 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1950 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1951
1952 # cpn generation algorithm is reverse engineered from base.js.
1953 # In fact it works even with dummy cpn.
1954 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1955 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1956
1957 qs.update({
1958 'ver': ['2'],
1959 'cpn': [cpn],
1960 })
1961 playback_url = compat_urlparse.urlunparse(
1962 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1963
1964 self._download_webpage(
1965 playback_url, video_id, 'Marking watched',
1966 'Unable to mark watched', fatal=False)
1967
1968 @staticmethod
1969 def _extract_urls(webpage):
1970 # Embedded YouTube player
1971 entries = [
1972 unescapeHTML(mobj.group('url'))
1973 for mobj in re.finditer(r'''(?x)
1974 (?:
1975 <iframe[^>]+?src=|
1976 data-video-url=|
1977 <embed[^>]+?src=|
1978 embedSWF\(?:\s*|
1979 <object[^>]+data=|
1980 new\s+SWFObject\(
1981 )
1982 (["\'])
1983 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1984 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1985 \1''', webpage)]
1986
1987 # lazyYT YouTube embed
1988 entries.extend(list(map(
1989 unescapeHTML,
1990 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1991
1992 # Wordpress "YouTube Video Importer" plugin
1993 matches = re.findall(r'''(?x)<div[^>]+
1994 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1995 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1996 entries.extend(m[-1] for m in matches)
1997
1998 return entries
1999
2000 @staticmethod
2001 def _extract_url(webpage):
2002 urls = YoutubeIE._extract_urls(webpage)
2003 return urls[0] if urls else None
2004
2005 @classmethod
2006 def extract_id(cls, url):
2007 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2008 if mobj is None:
2009 raise ExtractorError('Invalid URL: %s' % url)
2010 return mobj.group('id')
2011
2012 def _extract_chapters_from_json(self, data, duration):
2013 chapter_list = traverse_obj(
2014 data, (
2015 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2016 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2017 ), expected_type=list)
2018
2019 return self._extract_chapters(
2020 chapter_list,
2021 chapter_time=lambda chapter: float_or_none(
2022 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2023 chapter_title=lambda chapter: traverse_obj(
2024 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2025 duration=duration)
2026
2027 def _extract_chapters_from_engagement_panel(self, data, duration):
2028 content_list = traverse_obj(
2029 data,
2030 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2031 expected_type=list, default=[])
2032 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2033 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2034
2035 return next((
2036 filter(None, (
2037 self._extract_chapters(
2038 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2039 chapter_time, chapter_title, duration)
2040 for contents in content_list
2041 ))), [])
2042
2043 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2044 chapters = []
2045 last_chapter = {'start_time': 0}
2046 for idx, chapter in enumerate(chapter_list or []):
2047 title = chapter_title(chapter)
2048 start_time = chapter_time(chapter)
2049 if start_time is None:
2050 continue
2051 last_chapter['end_time'] = start_time
2052 if start_time < last_chapter['start_time']:
2053 if idx == 1:
2054 chapters.pop()
2055 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2056 else:
2057 self.report_warning(f'Invalid start time for chapter "{title}"')
2058 continue
2059 last_chapter = {'start_time': start_time, 'title': title}
2060 chapters.append(last_chapter)
2061 last_chapter['end_time'] = duration
2062 return chapters
2063
2064 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2065 return self._parse_json(self._search_regex(
2066 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2067 regex), webpage, name, default='{}'), video_id, fatal=False)
2068
2069 @staticmethod
2070 def parse_time_text(time_text):
2071 """
2072 Parse the comment time text
2073 time_text is in the format 'X units ago (edited)'
2074 """
2075 time_text_split = time_text.split(' ')
2076 if len(time_text_split) >= 3:
2077 try:
2078 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2079 except ValueError:
2080 return None
2081
2082 def _extract_comment(self, comment_renderer, parent=None):
2083 comment_id = comment_renderer.get('commentId')
2084 if not comment_id:
2085 return
2086
2087 text = self._get_text(comment_renderer, 'contentText')
2088
2089 # note: timestamp is an estimate calculated from the current time and time_text
2090 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2091 time_text_dt = self.parse_time_text(time_text)
2092 if isinstance(time_text_dt, datetime.datetime):
2093 timestamp = calendar.timegm(time_text_dt.timetuple())
2094 author = self._get_text(comment_renderer, 'authorText')
2095 author_id = try_get(comment_renderer,
2096 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2097
2098 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2099 lambda x: x['likeCount']), compat_str)) or 0
2100 author_thumbnail = try_get(comment_renderer,
2101 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2102
2103 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2104 is_favorited = 'creatorHeart' in (try_get(
2105 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2106 return {
2107 'id': comment_id,
2108 'text': text,
2109 'timestamp': timestamp,
2110 'time_text': time_text,
2111 'like_count': votes,
2112 'is_favorited': is_favorited,
2113 'author': author,
2114 'author_id': author_id,
2115 'author_thumbnail': author_thumbnail,
2116 'author_is_uploader': author_is_uploader,
2117 'parent': parent or 'root'
2118 }
2119
2120 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None):
2121
2122 def extract_header(contents):
2123 _continuation = None
2124 for content in contents:
2125 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2126 expected_comment_count = parse_count(self._get_text(
2127 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2128
2129 if expected_comment_count:
2130 comment_counts[1] = expected_comment_count
2131 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2132 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2133 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2134
2135 sort_menu_item = try_get(
2136 comments_header_renderer,
2137 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2138 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2139
2140 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2141 if not _continuation:
2142 continue
2143
2144 sort_text = sort_menu_item.get('title')
2145 if isinstance(sort_text, compat_str):
2146 sort_text = sort_text.lower()
2147 else:
2148 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2149 self.to_screen('Sorting comments by %s' % sort_text)
2150 break
2151 return _continuation
2152
2153 def extract_thread(contents):
2154 if not parent:
2155 comment_counts[2] = 0
2156 for content in contents:
2157 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2158 comment_renderer = try_get(
2159 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2160 content, (lambda x: x['commentRenderer'], dict))
2161
2162 if not comment_renderer:
2163 continue
2164 comment = self._extract_comment(comment_renderer, parent)
2165 if not comment:
2166 continue
2167 comment_counts[0] += 1
2168 yield comment
2169 # Attempt to get the replies
2170 comment_replies_renderer = try_get(
2171 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2172
2173 if comment_replies_renderer:
2174 comment_counts[2] += 1
2175 comment_entries_iter = self._comment_entries(
2176 comment_replies_renderer, ytcfg, video_id,
2177 parent=comment.get('id'), comment_counts=comment_counts)
2178
2179 for reply_comment in comment_entries_iter:
2180 yield reply_comment
2181
2182 # YouTube comments have a max depth of 2
2183 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2184 if max_depth == 1 and parent:
2185 return
2186 if not comment_counts:
2187 # comment so far, est. total comments, current comment thread #
2188 comment_counts = [0, 0, 0]
2189
2190 continuation = self._extract_continuation(root_continuation_data)
2191 if continuation and len(continuation['continuation']) < 27:
2192 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2193 continuation_token = self._generate_comment_continuation(video_id)
2194 continuation = self._build_api_continuation_query(continuation_token, None)
2195
2196 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2197 if message and not parent:
2198 self.report_warning(message, video_id=video_id)
2199
2200 visitor_data = None
2201 is_first_continuation = parent is None
2202
2203 for page_num in itertools.count(0):
2204 if not continuation:
2205 break
2206 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
2207 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2208 if page_num == 0:
2209 if is_first_continuation:
2210 note_prefix = 'Downloading comment section API JSON'
2211 else:
2212 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2213 comment_counts[2], comment_prog_str)
2214 else:
2215 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2216 ' ' if parent else '', ' replies' if parent else '',
2217 page_num, comment_prog_str)
2218
2219 response = self._extract_response(
2220 item_id=None, query=continuation,
2221 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2222 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2223 if not response:
2224 break
2225 visitor_data = try_get(
2226 response,
2227 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2228 compat_str) or visitor_data
2229
2230 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2231
2232 continuation = None
2233 if isinstance(continuation_contents, list):
2234 for continuation_section in continuation_contents:
2235 if not isinstance(continuation_section, dict):
2236 continue
2237 continuation_items = try_get(
2238 continuation_section,
2239 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2240 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2241 list) or []
2242 if is_first_continuation:
2243 continuation = extract_header(continuation_items)
2244 is_first_continuation = False
2245 if continuation:
2246 break
2247 continue
2248 count = 0
2249 for count, entry in enumerate(extract_thread(continuation_items)):
2250 yield entry
2251 continuation = self._extract_continuation({'contents': continuation_items})
2252 if continuation:
2253 # Sometimes YouTube provides a continuation without any comments
2254 # In most cases we end up just downloading these with very little comments to come.
2255 if count == 0:
2256 if not parent:
2257 self.report_warning('No comments received - assuming end of comments')
2258 continuation = None
2259 break
2260
2261 # Deprecated response structure
2262 elif isinstance(continuation_contents, dict):
2263 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2264 for key, continuation_renderer in continuation_contents.items():
2265 if key not in known_continuation_renderers:
2266 continue
2267 if not isinstance(continuation_renderer, dict):
2268 continue
2269 if is_first_continuation:
2270 header_continuation_items = [continuation_renderer.get('header') or {}]
2271 continuation = extract_header(header_continuation_items)
2272 is_first_continuation = False
2273 if continuation:
2274 break
2275
2276 # Sometimes YouTube provides a continuation without any comments
2277 # In most cases we end up just downloading these with very little comments to come.
2278 count = 0
2279 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2280 yield entry
2281 continuation = self._extract_continuation(continuation_renderer)
2282 if count == 0:
2283 if not parent:
2284 self.report_warning('No comments received - assuming end of comments')
2285 continuation = None
2286 break
2287
2288 @staticmethod
2289 def _generate_comment_continuation(video_id):
2290 """
2291 Generates initial comment section continuation token from given video id
2292 """
2293 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2294 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2295 new_continuation_intlist = list(itertools.chain.from_iterable(
2296 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2297 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2298
2299 def _get_comments(self, ytcfg, video_id, contents, webpage):
2300 """Entry for comment extraction"""
2301 def _real_comment_extract(contents):
2302 renderer = next((
2303 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2304 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2305 yield from self._comment_entries(renderer, ytcfg, video_id)
2306
2307 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
2308 # Force English regardless of account setting to prevent parsing issues
2309 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2310 ytcfg = copy.deepcopy(ytcfg)
2311 traverse_obj(
2312 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2313 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
2314
2315 @staticmethod
2316 def _get_checkok_params():
2317 return {'contentCheckOk': True, 'racyCheckOk': True}
2318
2319 @classmethod
2320 def _generate_player_context(cls, sts=None):
2321 context = {
2322 'html5Preference': 'HTML5_PREF_WANTS',
2323 }
2324 if sts is not None:
2325 context['signatureTimestamp'] = sts
2326 return {
2327 'playbackContext': {
2328 'contentPlaybackContext': context
2329 },
2330 **cls._get_checkok_params()
2331 }
2332
2333 @staticmethod
2334 def _is_agegated(player_response):
2335 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2336 return True
2337
2338 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2339 AGE_GATE_REASONS = (
2340 'confirm your age', 'age-restricted', 'inappropriate', # reason
2341 'age_verification_required', 'age_check_required', # status
2342 )
2343 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2344
2345 @staticmethod
2346 def _is_unplayable(player_response):
2347 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2348
2349 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2350
2351 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2352 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2353 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2354 headers = self.generate_api_headers(
2355 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2356
2357 yt_query = {'videoId': video_id}
2358 yt_query.update(self._generate_player_context(sts))
2359 return self._extract_response(
2360 item_id=video_id, ep='player', query=yt_query,
2361 ytcfg=player_ytcfg, headers=headers, fatal=True,
2362 default_client=client,
2363 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2364 ) or None
2365
2366 def _get_requested_clients(self, url, smuggled_data):
2367 requested_clients = []
2368 default = ['android', 'web']
2369 allowed_clients = sorted(
2370 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2371 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2372 for client in self._configuration_arg('player_client'):
2373 if client in allowed_clients:
2374 requested_clients.append(client)
2375 elif client == 'default':
2376 requested_clients.extend(default)
2377 elif client == 'all':
2378 requested_clients.extend(allowed_clients)
2379 else:
2380 self.report_warning(f'Skipping unsupported client {client}')
2381 if not requested_clients:
2382 requested_clients = default
2383
2384 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2385 requested_clients.extend(
2386 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2387
2388 return orderedSet(requested_clients)
2389
2390 def _extract_player_ytcfg(self, client, video_id):
2391 url = {
2392 'web_music': 'https://music.youtube.com',
2393 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2394 }.get(client)
2395 if not url:
2396 return {}
2397 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2398 return self.extract_ytcfg(video_id, webpage) or {}
2399
2400 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2401 initial_pr = None
2402 if webpage:
2403 initial_pr = self._extract_yt_initial_variable(
2404 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2405 video_id, 'initial player response')
2406
2407 original_clients = clients
2408 clients = clients[::-1]
2409 prs = []
2410
2411 def append_client(client_name):
2412 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2413 clients.append(client_name)
2414
2415 # Android player_response does not have microFormats which are needed for
2416 # extraction of some data. So we return the initial_pr with formats
2417 # stripped out even if not requested by the user
2418 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2419 if initial_pr:
2420 pr = dict(initial_pr)
2421 pr['streamingData'] = None
2422 prs.append(pr)
2423
2424 last_error = None
2425 tried_iframe_fallback = False
2426 player_url = None
2427 while clients:
2428 client = clients.pop()
2429 player_ytcfg = master_ytcfg if client == 'web' else {}
2430 if 'configs' not in self._configuration_arg('player_skip'):
2431 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2432
2433 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2434 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2435 if 'js' in self._configuration_arg('player_skip'):
2436 require_js_player = False
2437 player_url = None
2438
2439 if not player_url and not tried_iframe_fallback and require_js_player:
2440 player_url = self._download_player_url(video_id)
2441 tried_iframe_fallback = True
2442
2443 try:
2444 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2445 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2446 except ExtractorError as e:
2447 if last_error:
2448 self.report_warning(last_error)
2449 last_error = e
2450 continue
2451
2452 if pr:
2453 prs.append(pr)
2454
2455 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2456 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2457 append_client(client.replace('_agegate', '_creator'))
2458 elif self._is_agegated(pr):
2459 append_client(f'{client}_agegate')
2460
2461 if last_error:
2462 if not len(prs):
2463 raise last_error
2464 self.report_warning(last_error)
2465 return prs, player_url
2466
2467 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2468 itags, stream_ids = {}, []
2469 itag_qualities, res_qualities = {}, {}
2470 q = qualities([
2471 # Normally tiny is the smallest video-only formats. But
2472 # audio-only formats with unknown quality may get tagged as tiny
2473 'tiny',
2474 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2475 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2476 ])
2477 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2478
2479 for fmt in streaming_formats:
2480 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2481 continue
2482
2483 itag = str_or_none(fmt.get('itag'))
2484 audio_track = fmt.get('audioTrack') or {}
2485 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2486 if stream_id in stream_ids:
2487 continue
2488
2489 quality = fmt.get('quality')
2490 height = int_or_none(fmt.get('height'))
2491 if quality == 'tiny' or not quality:
2492 quality = fmt.get('audioQuality', '').lower() or quality
2493 # The 3gp format (17) in android client has a quality of "small",
2494 # but is actually worse than other formats
2495 if itag == '17':
2496 quality = 'tiny'
2497 if quality:
2498 if itag:
2499 itag_qualities[itag] = quality
2500 if height:
2501 res_qualities[height] = quality
2502 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2503 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2504 # number of fragment that would subsequently requested with (`&sq=N`)
2505 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2506 continue
2507
2508 fmt_url = fmt.get('url')
2509 if not fmt_url:
2510 sc = compat_parse_qs(fmt.get('signatureCipher'))
2511 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2512 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2513 if not (sc and fmt_url and encrypted_sig):
2514 continue
2515 if not player_url:
2516 continue
2517 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2518 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2519 fmt_url += '&' + sp + '=' + signature
2520
2521 query = parse_qs(fmt_url)
2522 throttled = False
2523 if query.get('n'):
2524 try:
2525 fmt_url = update_url_query(fmt_url, {
2526 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2527 except ExtractorError as e:
2528 self.report_warning(
2529 f'nsig extraction failed: You may experience throttling for some formats\n'
2530 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
2531 throttled = True
2532
2533 if itag:
2534 itags[itag] = 'https'
2535 stream_ids.append(stream_id)
2536
2537 tbr = float_or_none(
2538 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2539 dct = {
2540 'asr': int_or_none(fmt.get('audioSampleRate')),
2541 'filesize': int_or_none(fmt.get('contentLength')),
2542 'format_id': itag,
2543 'format_note': join_nonempty(
2544 '%s%s' % (audio_track.get('displayName') or '',
2545 ' (default)' if audio_track.get('audioIsDefault') else ''),
2546 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
2547 throttled and 'THROTTLED', delim=', '),
2548 'source_preference': -10 if throttled else -1,
2549 'fps': int_or_none(fmt.get('fps')) or None,
2550 'height': height,
2551 'quality': q(quality),
2552 'tbr': tbr,
2553 'url': fmt_url,
2554 'width': int_or_none(fmt.get('width')),
2555 'language': audio_track.get('id', '').split('.')[0],
2556 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2557 }
2558 mime_mobj = re.match(
2559 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2560 if mime_mobj:
2561 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2562 dct.update(parse_codecs(mime_mobj.group(2)))
2563 no_audio = dct.get('acodec') == 'none'
2564 no_video = dct.get('vcodec') == 'none'
2565 if no_audio:
2566 dct['vbr'] = tbr
2567 if no_video:
2568 dct['abr'] = tbr
2569 if no_audio or no_video:
2570 dct['downloader_options'] = {
2571 # Youtube throttles chunks >~10M
2572 'http_chunk_size': 10485760,
2573 }
2574 if dct.get('ext'):
2575 dct['container'] = dct['ext'] + '_dash'
2576 yield dct
2577
2578 skip_manifests = self._configuration_arg('skip')
2579 get_dash = (
2580 (not is_live or self._configuration_arg('include_live_dash'))
2581 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2582 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2583
2584 def process_manifest_format(f, proto, itag):
2585 if itag in itags:
2586 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2587 return False
2588 itag = f'{itag}-{proto}'
2589 if itag:
2590 f['format_id'] = itag
2591 itags[itag] = proto
2592
2593 f['quality'] = next((
2594 q(qdict[val])
2595 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
2596 if val in qdict), -1)
2597 return True
2598
2599 for sd in streaming_data:
2600 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2601 if hls_manifest_url:
2602 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2603 if process_manifest_format(f, 'hls', self._search_regex(
2604 r'/itag/(\d+)', f['url'], 'itag', default=None)):
2605 yield f
2606
2607 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2608 if dash_manifest_url:
2609 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2610 if process_manifest_format(f, 'dash', f['format_id']):
2611 f['filesize'] = int_or_none(self._search_regex(
2612 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2613 yield f
2614
2615 def _extract_storyboard(self, player_responses, duration):
2616 spec = get_first(
2617 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2618 if not spec:
2619 return
2620 base_url = spec.pop()
2621 L = len(spec) - 1
2622 for i, args in enumerate(spec):
2623 args = args.split('#')
2624 counts = list(map(int_or_none, args[:5]))
2625 if len(args) != 8 or not all(counts):
2626 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2627 continue
2628 width, height, frame_count, cols, rows = counts
2629 N, sigh = args[6:]
2630
2631 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2632 fragment_count = frame_count / (cols * rows)
2633 fragment_duration = duration / fragment_count
2634 yield {
2635 'format_id': f'sb{i}',
2636 'format_note': 'storyboard',
2637 'ext': 'mhtml',
2638 'protocol': 'mhtml',
2639 'acodec': 'none',
2640 'vcodec': 'none',
2641 'url': url,
2642 'width': width,
2643 'height': height,
2644 'fragments': [{
2645 'path': url.replace('$M', str(j)),
2646 'duration': min(fragment_duration, duration - (j * fragment_duration)),
2647 } for j in range(math.ceil(fragment_count))],
2648 }
2649
2650 def _real_extract(self, url):
2651 url, smuggled_data = unsmuggle_url(url, {})
2652 video_id = self._match_id(url)
2653
2654 base_url = self.http_scheme() + '//www.youtube.com/'
2655 webpage_url = base_url + 'watch?v=' + video_id
2656 webpage = None
2657 if 'webpage' not in self._configuration_arg('player_skip'):
2658 webpage = self._download_webpage(
2659 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2660
2661 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2662
2663 player_responses, player_url = self._extract_player_responses(
2664 self._get_requested_clients(url, smuggled_data),
2665 video_id, webpage, master_ytcfg)
2666
2667 playability_statuses = traverse_obj(
2668 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2669
2670 trailer_video_id = get_first(
2671 playability_statuses,
2672 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2673 expected_type=str)
2674 if trailer_video_id:
2675 return self.url_result(
2676 trailer_video_id, self.ie_key(), trailer_video_id)
2677
2678 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2679 if webpage else (lambda x: None))
2680
2681 video_details = traverse_obj(
2682 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2683 microformats = traverse_obj(
2684 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2685 expected_type=dict, default=[])
2686 video_title = (
2687 get_first(video_details, 'title')
2688 or self._get_text(microformats, (..., 'title'))
2689 or search_meta(['og:title', 'twitter:title', 'title']))
2690 video_description = get_first(video_details, 'shortDescription')
2691
2692 multifeed_metadata_list = get_first(
2693 player_responses,
2694 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2695 expected_type=str)
2696 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2697 if self.get_param('noplaylist'):
2698 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2699 else:
2700 entries = []
2701 feed_ids = []
2702 for feed in multifeed_metadata_list.split(','):
2703 # Unquote should take place before split on comma (,) since textual
2704 # fields may contain comma as well (see
2705 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2706 feed_data = compat_parse_qs(
2707 compat_urllib_parse_unquote_plus(feed))
2708
2709 def feed_entry(name):
2710 return try_get(
2711 feed_data, lambda x: x[name][0], compat_str)
2712
2713 feed_id = feed_entry('id')
2714 if not feed_id:
2715 continue
2716 feed_title = feed_entry('title')
2717 title = video_title
2718 if feed_title:
2719 title += ' (%s)' % feed_title
2720 entries.append({
2721 '_type': 'url_transparent',
2722 'ie_key': 'Youtube',
2723 'url': smuggle_url(
2724 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2725 {'force_singlefeed': True}),
2726 'title': title,
2727 })
2728 feed_ids.append(feed_id)
2729 self.to_screen(
2730 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2731 % (', '.join(feed_ids), video_id))
2732 return self.playlist_result(
2733 entries, video_id, video_title, video_description)
2734
2735 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2736 is_live = get_first(video_details, 'isLive')
2737 if is_live is None:
2738 is_live = get_first(live_broadcast_details, 'isLiveNow')
2739
2740 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2741 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2742
2743 if not formats:
2744 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2745 self.report_drm(video_id)
2746 pemr = get_first(
2747 playability_statuses,
2748 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2749 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2750 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2751 if subreason:
2752 if subreason == 'The uploader has not made this video available in your country.':
2753 countries = get_first(microformats, 'availableCountries')
2754 if not countries:
2755 regions_allowed = search_meta('regionsAllowed')
2756 countries = regions_allowed.split(',') if regions_allowed else None
2757 self.raise_geo_restricted(subreason, countries, metadata_available=True)
2758 reason += f'. {subreason}'
2759 if reason:
2760 self.raise_no_formats(reason, expected=True)
2761
2762 keywords = get_first(video_details, 'keywords', expected_type=list) or []
2763 if not keywords and webpage:
2764 keywords = [
2765 unescapeHTML(m.group('content'))
2766 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2767 for keyword in keywords:
2768 if keyword.startswith('yt:stretch='):
2769 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2770 if mobj:
2771 # NB: float is intentional for forcing float division
2772 w, h = (float(v) for v in mobj.groups())
2773 if w > 0 and h > 0:
2774 ratio = w / h
2775 for f in formats:
2776 if f.get('vcodec') != 'none':
2777 f['stretched_ratio'] = ratio
2778 break
2779
2780 thumbnails = []
2781 thumbnail_dicts = traverse_obj(
2782 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2783 expected_type=dict, default=[])
2784 for thumbnail in thumbnail_dicts:
2785 thumbnail_url = thumbnail.get('url')
2786 if not thumbnail_url:
2787 continue
2788 # Sometimes youtube gives a wrong thumbnail URL. See:
2789 # https://github.com/yt-dlp/yt-dlp/issues/233
2790 # https://github.com/ytdl-org/youtube-dl/issues/28023
2791 if 'maxresdefault' in thumbnail_url:
2792 thumbnail_url = thumbnail_url.split('?')[0]
2793 thumbnails.append({
2794 'url': thumbnail_url,
2795 'height': int_or_none(thumbnail.get('height')),
2796 'width': int_or_none(thumbnail.get('width')),
2797 })
2798 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2799 if thumbnail_url:
2800 thumbnails.append({
2801 'url': thumbnail_url,
2802 })
2803 original_thumbnails = thumbnails.copy()
2804
2805 # The best resolution thumbnails sometimes does not appear in the webpage
2806 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2807 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2808 thumbnail_names = [
2809 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
2810 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2811 'mqdefault', 'mq1', 'mq2', 'mq3',
2812 'default', '1', '2', '3'
2813 ]
2814 n_thumbnail_names = len(thumbnail_names)
2815 thumbnails.extend({
2816 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2817 video_id=video_id, name=name, ext=ext,
2818 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2819 } for name in thumbnail_names for ext in ('webp', 'jpg'))
2820 for thumb in thumbnails:
2821 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2822 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2823 self._remove_duplicate_formats(thumbnails)
2824 self._downloader._sort_thumbnails(original_thumbnails)
2825
2826 category = get_first(microformats, 'category') or search_meta('genre')
2827 channel_id = str_or_none(
2828 get_first(video_details, 'channelId')
2829 or get_first(microformats, 'externalChannelId')
2830 or search_meta('channelId'))
2831 duration = int_or_none(
2832 get_first(video_details, 'lengthSeconds')
2833 or get_first(microformats, 'lengthSeconds')
2834 or parse_duration(search_meta('duration'))) or None
2835 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2836
2837 live_content = get_first(video_details, 'isLiveContent')
2838 is_upcoming = get_first(video_details, 'isUpcoming')
2839 if is_live is None:
2840 if is_upcoming or live_content is False:
2841 is_live = False
2842 if is_upcoming is None and (live_content or is_live):
2843 is_upcoming = False
2844 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2845 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2846 if not duration and live_endtime and live_starttime:
2847 duration = live_endtime - live_starttime
2848
2849 formats.extend(self._extract_storyboard(player_responses, duration))
2850
2851 # Source is given priority since formats that throttle are given lower source_preference
2852 # When throttling issue is fully fixed, remove this
2853 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2854
2855 info = {
2856 'id': video_id,
2857 'title': self._live_title(video_title) if is_live else video_title,
2858 'formats': formats,
2859 'thumbnails': thumbnails,
2860 # The best thumbnail that we are sure exists. Prevents unnecessary
2861 # URL checking if user don't care about getting the best possible thumbnail
2862 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
2863 'description': video_description,
2864 'upload_date': unified_strdate(
2865 get_first(microformats, 'uploadDate')
2866 or search_meta('uploadDate')),
2867 'uploader': get_first(video_details, 'author'),
2868 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2869 'uploader_url': owner_profile_url,
2870 'channel_id': channel_id,
2871 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2872 'duration': duration,
2873 'view_count': int_or_none(
2874 get_first((video_details, microformats), (..., 'viewCount'))
2875 or search_meta('interactionCount')),
2876 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2877 'age_limit': 18 if (
2878 get_first(microformats, 'isFamilySafe') is False
2879 or search_meta('isFamilyFriendly') == 'false'
2880 or search_meta('og:restrictions:age') == '18+') else 0,
2881 'webpage_url': webpage_url,
2882 'categories': [category] if category else None,
2883 'tags': keywords,
2884 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2885 'is_live': is_live,
2886 'was_live': (False if is_live or is_upcoming or live_content is False
2887 else None if is_live is None or is_upcoming is None
2888 else live_content),
2889 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2890 'release_timestamp': live_starttime,
2891 }
2892
2893 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2894 if pctr:
2895 def get_lang_code(track):
2896 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
2897 or track.get('languageCode'))
2898
2899 # Converted into dicts to remove duplicates
2900 captions = {
2901 get_lang_code(sub): sub
2902 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2903 translation_languages = {
2904 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
2905 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2906
2907 def process_language(container, base_url, lang_code, sub_name, query):
2908 lang_subs = container.setdefault(lang_code, [])
2909 for fmt in self._SUBTITLE_FORMATS:
2910 query.update({
2911 'fmt': fmt,
2912 })
2913 lang_subs.append({
2914 'ext': fmt,
2915 'url': update_url_query(base_url, query),
2916 'name': sub_name,
2917 })
2918
2919 subtitles, automatic_captions = {}, {}
2920 for lang_code, caption_track in captions.items():
2921 base_url = caption_track.get('baseUrl')
2922 if not base_url:
2923 continue
2924 lang_name = self._get_text(caption_track, 'name', max_runs=1)
2925 if caption_track.get('kind') != 'asr':
2926 if not lang_code:
2927 continue
2928 process_language(
2929 subtitles, base_url, lang_code, lang_name, {})
2930 if not caption_track.get('isTranslatable'):
2931 continue
2932 for trans_code, trans_name in translation_languages.items():
2933 if not trans_code:
2934 continue
2935 if caption_track.get('kind') != 'asr':
2936 trans_code += f'-{lang_code}'
2937 trans_name += format_field(lang_name, template=' from %s')
2938 process_language(
2939 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
2940 info['automatic_captions'] = automatic_captions
2941 info['subtitles'] = subtitles
2942
2943 parsed_url = compat_urllib_parse_urlparse(url)
2944 for component in [parsed_url.fragment, parsed_url.query]:
2945 query = compat_parse_qs(component)
2946 for k, v in query.items():
2947 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2948 d_k += '_time'
2949 if d_k not in info and k in s_ks:
2950 info[d_k] = parse_duration(query[k][0])
2951
2952 # Youtube Music Auto-generated description
2953 if video_description:
2954 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2955 if mobj:
2956 release_year = mobj.group('release_year')
2957 release_date = mobj.group('release_date')
2958 if release_date:
2959 release_date = release_date.replace('-', '')
2960 if not release_year:
2961 release_year = release_date[:4]
2962 info.update({
2963 'album': mobj.group('album'.strip()),
2964 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2965 'track': mobj.group('track').strip(),
2966 'release_date': release_date,
2967 'release_year': int_or_none(release_year),
2968 })
2969
2970 initial_data = None
2971 if webpage:
2972 initial_data = self._extract_yt_initial_variable(
2973 webpage, self._YT_INITIAL_DATA_RE, video_id,
2974 'yt initial data')
2975 if not initial_data:
2976 query = {'videoId': video_id}
2977 query.update(self._get_checkok_params())
2978 initial_data = self._extract_response(
2979 item_id=video_id, ep='next', fatal=False,
2980 ytcfg=master_ytcfg, query=query,
2981 headers=self.generate_api_headers(ytcfg=master_ytcfg),
2982 note='Downloading initial data API JSON')
2983
2984 try:
2985 # This will error if there is no livechat
2986 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2987 info.setdefault('subtitles', {})['live_chat'] = [{
2988 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2989 'video_id': video_id,
2990 'ext': 'json',
2991 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2992 }]
2993 except (KeyError, IndexError, TypeError):
2994 pass
2995
2996 if initial_data:
2997 info['chapters'] = (
2998 self._extract_chapters_from_json(initial_data, duration)
2999 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3000 or None)
3001
3002 contents = try_get(
3003 initial_data,
3004 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3005 list) or []
3006 for content in contents:
3007 vpir = content.get('videoPrimaryInfoRenderer')
3008 if vpir:
3009 stl = vpir.get('superTitleLink')
3010 if stl:
3011 stl = self._get_text(stl)
3012 if try_get(
3013 vpir,
3014 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3015 info['location'] = stl
3016 else:
3017 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3018 if mobj:
3019 info.update({
3020 'series': mobj.group(1),
3021 'season_number': int(mobj.group(2)),
3022 'episode_number': int(mobj.group(3)),
3023 })
3024 for tlb in (try_get(
3025 vpir,
3026 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3027 list) or []):
3028 tbr = tlb.get('toggleButtonRenderer') or {}
3029 for getter, regex in [(
3030 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3031 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3032 lambda x: x['accessibility'],
3033 lambda x: x['accessibilityData']['accessibilityData'],
3034 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3035 label = (try_get(tbr, getter, dict) or {}).get('label')
3036 if label:
3037 mobj = re.match(regex, label)
3038 if mobj:
3039 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3040 break
3041 sbr_tooltip = try_get(
3042 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3043 if sbr_tooltip:
3044 like_count, dislike_count = sbr_tooltip.split(' / ')
3045 info.update({
3046 'like_count': str_to_int(like_count),
3047 'dislike_count': str_to_int(dislike_count),
3048 })
3049 vsir = content.get('videoSecondaryInfoRenderer')
3050 if vsir:
3051 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3052 rows = try_get(
3053 vsir,
3054 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3055 list) or []
3056 multiple_songs = False
3057 for row in rows:
3058 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3059 multiple_songs = True
3060 break
3061 for row in rows:
3062 mrr = row.get('metadataRowRenderer') or {}
3063 mrr_title = mrr.get('title')
3064 if not mrr_title:
3065 continue
3066 mrr_title = self._get_text(mrr, 'title')
3067 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3068 if mrr_title == 'License':
3069 info['license'] = mrr_contents_text
3070 elif not multiple_songs:
3071 if mrr_title == 'Album':
3072 info['album'] = mrr_contents_text
3073 elif mrr_title == 'Artist':
3074 info['artist'] = mrr_contents_text
3075 elif mrr_title == 'Song':
3076 info['track'] = mrr_contents_text
3077
3078 fallbacks = {
3079 'channel': 'uploader',
3080 'channel_id': 'uploader_id',
3081 'channel_url': 'uploader_url',
3082 }
3083 for to, frm in fallbacks.items():
3084 if not info.get(to):
3085 info[to] = info.get(frm)
3086
3087 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3088 v = info.get(s_k)
3089 if v:
3090 info[d_k] = v
3091
3092 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3093 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3094 is_membersonly = None
3095 is_premium = None
3096 if initial_data and is_private is not None:
3097 is_membersonly = False
3098 is_premium = False
3099 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3100 badge_labels = set()
3101 for content in contents:
3102 if not isinstance(content, dict):
3103 continue
3104 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3105 for badge_label in badge_labels:
3106 if badge_label.lower() == 'members only':
3107 is_membersonly = True
3108 elif badge_label.lower() == 'premium':
3109 is_premium = True
3110 elif badge_label.lower() == 'unlisted':
3111 is_unlisted = True
3112
3113 info['availability'] = self._availability(
3114 is_private=is_private,
3115 needs_premium=is_premium,
3116 needs_subscription=is_membersonly,
3117 needs_auth=info['age_limit'] >= 18,
3118 is_unlisted=None if is_private is None else is_unlisted)
3119
3120 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3121
3122 self.mark_watched(video_id, player_responses)
3123
3124 return info
3125
3126
3127class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
3128
3129 def _extract_channel_id(self, webpage):
3130 channel_id = self._html_search_meta(
3131 'channelId', webpage, 'channel id', default=None)
3132 if channel_id:
3133 return channel_id
3134 channel_url = self._html_search_meta(
3135 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3136 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3137 'twitter:app:url:googleplay'), webpage, 'channel url')
3138 return self._search_regex(
3139 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3140 channel_url, 'channel id')
3141
3142 @staticmethod
3143 def _extract_basic_item_renderer(item):
3144 # Modified from _extract_grid_item_renderer
3145 known_basic_renderers = (
3146 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3147 )
3148 for key, renderer in item.items():
3149 if not isinstance(renderer, dict):
3150 continue
3151 elif key in known_basic_renderers:
3152 return renderer
3153 elif key.startswith('grid') and key.endswith('Renderer'):
3154 return renderer
3155
3156 def _grid_entries(self, grid_renderer):
3157 for item in grid_renderer['items']:
3158 if not isinstance(item, dict):
3159 continue
3160 renderer = self._extract_basic_item_renderer(item)
3161 if not isinstance(renderer, dict):
3162 continue
3163 title = self._get_text(renderer, 'title')
3164
3165 # playlist
3166 playlist_id = renderer.get('playlistId')
3167 if playlist_id:
3168 yield self.url_result(
3169 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3170 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3171 video_title=title)
3172 continue
3173 # video
3174 video_id = renderer.get('videoId')
3175 if video_id:
3176 yield self._extract_video(renderer)
3177 continue
3178 # channel
3179 channel_id = renderer.get('channelId')
3180 if channel_id:
3181 yield self.url_result(
3182 'https://www.youtube.com/channel/%s' % channel_id,
3183 ie=YoutubeTabIE.ie_key(), video_title=title)
3184 continue
3185 # generic endpoint URL support
3186 ep_url = urljoin('https://www.youtube.com/', try_get(
3187 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3188 compat_str))
3189 if ep_url:
3190 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3191 if ie.suitable(ep_url):
3192 yield self.url_result(
3193 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3194 break
3195
3196 def _shelf_entries_from_content(self, shelf_renderer):
3197 content = shelf_renderer.get('content')
3198 if not isinstance(content, dict):
3199 return
3200 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3201 if renderer:
3202 # TODO: add support for nested playlists so each shelf is processed
3203 # as separate playlist
3204 # TODO: this includes only first N items
3205 for entry in self._grid_entries(renderer):
3206 yield entry
3207 renderer = content.get('horizontalListRenderer')
3208 if renderer:
3209 # TODO
3210 pass
3211
3212 def _shelf_entries(self, shelf_renderer, skip_channels=False):
3213 ep = try_get(
3214 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3215 compat_str)
3216 shelf_url = urljoin('https://www.youtube.com', ep)
3217 if shelf_url:
3218 # Skipping links to another channels, note that checking for
3219 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3220 # will not work
3221 if skip_channels and '/channels?' in shelf_url:
3222 return
3223 title = self._get_text(shelf_renderer, 'title')
3224 yield self.url_result(shelf_url, video_title=title)
3225 # Shelf may not contain shelf URL, fallback to extraction from content
3226 for entry in self._shelf_entries_from_content(shelf_renderer):
3227 yield entry
3228
3229 def _playlist_entries(self, video_list_renderer):
3230 for content in video_list_renderer['contents']:
3231 if not isinstance(content, dict):
3232 continue
3233 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3234 if not isinstance(renderer, dict):
3235 continue
3236 video_id = renderer.get('videoId')
3237 if not video_id:
3238 continue
3239 yield self._extract_video(renderer)
3240
3241 def _rich_entries(self, rich_grid_renderer):
3242 renderer = try_get(
3243 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3244 video_id = renderer.get('videoId')
3245 if not video_id:
3246 return
3247 yield self._extract_video(renderer)
3248
3249 def _video_entry(self, video_renderer):
3250 video_id = video_renderer.get('videoId')
3251 if video_id:
3252 return self._extract_video(video_renderer)
3253
3254 def _post_thread_entries(self, post_thread_renderer):
3255 post_renderer = try_get(
3256 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3257 if not post_renderer:
3258 return
3259 # video attachment
3260 video_renderer = try_get(
3261 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3262 video_id = video_renderer.get('videoId')
3263 if video_id:
3264 entry = self._extract_video(video_renderer)
3265 if entry:
3266 yield entry
3267 # playlist attachment
3268 playlist_id = try_get(
3269 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3270 if playlist_id:
3271 yield self.url_result(
3272 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3273 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3274 # inline video links
3275 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3276 for run in runs:
3277 if not isinstance(run, dict):
3278 continue
3279 ep_url = try_get(
3280 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3281 if not ep_url:
3282 continue
3283 if not YoutubeIE.suitable(ep_url):
3284 continue
3285 ep_video_id = YoutubeIE._match_id(ep_url)
3286 if video_id == ep_video_id:
3287 continue
3288 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3289
3290 def _post_thread_continuation_entries(self, post_thread_continuation):
3291 contents = post_thread_continuation.get('contents')
3292 if not isinstance(contents, list):
3293 return
3294 for content in contents:
3295 renderer = content.get('backstagePostThreadRenderer')
3296 if not isinstance(renderer, dict):
3297 continue
3298 for entry in self._post_thread_entries(renderer):
3299 yield entry
3300
3301 r''' # unused
3302 def _rich_grid_entries(self, contents):
3303 for content in contents:
3304 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3305 if video_renderer:
3306 entry = self._video_entry(video_renderer)
3307 if entry:
3308 yield entry
3309 '''
3310 def _extract_entries(self, parent_renderer, continuation_list):
3311 # continuation_list is modified in-place with continuation_list = [continuation_token]
3312 continuation_list[:] = [None]
3313 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3314 for content in contents:
3315 if not isinstance(content, dict):
3316 continue
3317 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3318 if not is_renderer:
3319 renderer = content.get('richItemRenderer')
3320 if renderer:
3321 for entry in self._rich_entries(renderer):
3322 yield entry
3323 continuation_list[0] = self._extract_continuation(parent_renderer)
3324 continue
3325 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3326 for isr_content in isr_contents:
3327 if not isinstance(isr_content, dict):
3328 continue
3329
3330 known_renderers = {
3331 'playlistVideoListRenderer': self._playlist_entries,
3332 'gridRenderer': self._grid_entries,
3333 'shelfRenderer': lambda x: self._shelf_entries(x),
3334 'backstagePostThreadRenderer': self._post_thread_entries,
3335 'videoRenderer': lambda x: [self._video_entry(x)],
3336 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
3337 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
3338 }
3339 for key, renderer in isr_content.items():
3340 if key not in known_renderers:
3341 continue
3342 for entry in known_renderers[key](renderer):
3343 if entry:
3344 yield entry
3345 continuation_list[0] = self._extract_continuation(renderer)
3346 break
3347
3348 if not continuation_list[0]:
3349 continuation_list[0] = self._extract_continuation(is_renderer)
3350
3351 if not continuation_list[0]:
3352 continuation_list[0] = self._extract_continuation(parent_renderer)
3353
3354 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3355 continuation_list = [None]
3356 extract_entries = lambda x: self._extract_entries(x, continuation_list)
3357 tab_content = try_get(tab, lambda x: x['content'], dict)
3358 if not tab_content:
3359 return
3360 parent_renderer = (
3361 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3362 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3363 for entry in extract_entries(parent_renderer):
3364 yield entry
3365 continuation = continuation_list[0]
3366
3367 for page_num in itertools.count(1):
3368 if not continuation:
3369 break
3370 headers = self.generate_api_headers(
3371 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3372 response = self._extract_response(
3373 item_id='%s page %s' % (item_id, page_num),
3374 query=continuation, headers=headers, ytcfg=ytcfg,
3375 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3376
3377 if not response:
3378 break
3379 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3380 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3381 visitor_data = self._extract_visitor_data(response) or visitor_data
3382
3383 known_continuation_renderers = {
3384 'playlistVideoListContinuation': self._playlist_entries,
3385 'gridContinuation': self._grid_entries,
3386 'itemSectionContinuation': self._post_thread_continuation_entries,
3387 'sectionListContinuation': extract_entries, # for feeds
3388 }
3389 continuation_contents = try_get(
3390 response, lambda x: x['continuationContents'], dict) or {}
3391 continuation_renderer = None
3392 for key, value in continuation_contents.items():
3393 if key not in known_continuation_renderers:
3394 continue
3395 continuation_renderer = value
3396 continuation_list = [None]
3397 for entry in known_continuation_renderers[key](continuation_renderer):
3398 yield entry
3399 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3400 break
3401 if continuation_renderer:
3402 continue
3403
3404 known_renderers = {
3405 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3406 'gridVideoRenderer': (self._grid_entries, 'items'),
3407 'gridChannelRenderer': (self._grid_entries, 'items'),
3408 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3409 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
3410 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
3411 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3412 }
3413 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3414 continuation_items = try_get(
3415 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3416 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3417 video_items_renderer = None
3418 for key, value in continuation_item.items():
3419 if key not in known_renderers:
3420 continue
3421 video_items_renderer = {known_renderers[key][1]: continuation_items}
3422 continuation_list = [None]
3423 for entry in known_renderers[key][0](video_items_renderer):
3424 yield entry
3425 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3426 break
3427 if video_items_renderer:
3428 continue
3429 break
3430
3431 @staticmethod
3432 def _extract_selected_tab(tabs):
3433 for tab in tabs:
3434 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3435 if renderer.get('selected') is True:
3436 return renderer
3437 else:
3438 raise ExtractorError('Unable to find selected tab')
3439
3440 @classmethod
3441 def _extract_uploader(cls, data):
3442 uploader = {}
3443 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3444 owner = try_get(
3445 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3446 if owner:
3447 uploader['uploader'] = owner.get('text')
3448 uploader['uploader_id'] = try_get(
3449 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3450 uploader['uploader_url'] = urljoin(
3451 'https://www.youtube.com/',
3452 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3453 return {k: v for k, v in uploader.items() if v is not None}
3454
3455 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
3456 playlist_id = title = description = channel_url = channel_name = channel_id = None
3457 thumbnails_list = []
3458 tags = []
3459
3460 selected_tab = self._extract_selected_tab(tabs)
3461 renderer = try_get(
3462 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3463 if renderer:
3464 channel_name = renderer.get('title')
3465 channel_url = renderer.get('channelUrl')
3466 channel_id = renderer.get('externalId')
3467 else:
3468 renderer = try_get(
3469 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3470
3471 if renderer:
3472 title = renderer.get('title')
3473 description = renderer.get('description', '')
3474 playlist_id = channel_id
3475 tags = renderer.get('keywords', '').split()
3476 thumbnails_list = (
3477 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3478 or try_get(
3479 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3480 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3481 list)
3482 or [])
3483
3484 thumbnails = []
3485 for t in thumbnails_list:
3486 if not isinstance(t, dict):
3487 continue
3488 thumbnail_url = url_or_none(t.get('url'))
3489 if not thumbnail_url:
3490 continue
3491 thumbnails.append({
3492 'url': thumbnail_url,
3493 'width': int_or_none(t.get('width')),
3494 'height': int_or_none(t.get('height')),
3495 })
3496 if playlist_id is None:
3497 playlist_id = item_id
3498 if title is None:
3499 title = (
3500 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3501 or playlist_id)
3502 title += format_field(selected_tab, 'title', ' - %s')
3503 title += format_field(selected_tab, 'expandedText', ' - %s')
3504 metadata = {
3505 'playlist_id': playlist_id,
3506 'playlist_title': title,
3507 'playlist_description': description,
3508 'uploader': channel_name,
3509 'uploader_id': channel_id,
3510 'uploader_url': channel_url,
3511 'thumbnails': thumbnails,
3512 'tags': tags,
3513 }
3514 availability = self._extract_availability(data)
3515 if availability:
3516 metadata['availability'] = availability
3517 if not channel_id:
3518 metadata.update(self._extract_uploader(data))
3519 metadata.update({
3520 'channel': metadata['uploader'],
3521 'channel_id': metadata['uploader_id'],
3522 'channel_url': metadata['uploader_url']})
3523 return self.playlist_result(
3524 self._entries(
3525 selected_tab, playlist_id, ytcfg,
3526 self._extract_account_syncid(ytcfg, data),
3527 self._extract_visitor_data(data, ytcfg)),
3528 **metadata)
3529
3530 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3531 first_id = last_id = response = None
3532 for page_num in itertools.count(1):
3533 videos = list(self._playlist_entries(playlist))
3534 if not videos:
3535 return
3536 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3537 if start >= len(videos):
3538 return
3539 for video in videos[start:]:
3540 if video['id'] == first_id:
3541 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3542 return
3543 yield video
3544 first_id = first_id or videos[0]['id']
3545 last_id = videos[-1]['id']
3546 watch_endpoint = try_get(
3547 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3548 headers = self.generate_api_headers(
3549 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3550 visitor_data=self._extract_visitor_data(response, data, ytcfg))
3551 query = {
3552 'playlistId': playlist_id,
3553 'videoId': watch_endpoint.get('videoId') or last_id,
3554 'index': watch_endpoint.get('index') or len(videos),
3555 'params': watch_endpoint.get('params') or 'OAE%3D'
3556 }
3557 response = self._extract_response(
3558 item_id='%s page %d' % (playlist_id, page_num),
3559 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3560 check_get_keys='contents'
3561 )
3562 playlist = try_get(
3563 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3564
3565 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
3566 title = playlist.get('title') or try_get(
3567 data, lambda x: x['titleText']['simpleText'], compat_str)
3568 playlist_id = playlist.get('playlistId') or item_id
3569
3570 # Delegating everything except mix playlists to regular tab-based playlist URL
3571 playlist_url = urljoin(url, try_get(
3572 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3573 compat_str))
3574 if playlist_url and playlist_url != url:
3575 return self.url_result(
3576 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3577 video_title=title)
3578
3579 return self.playlist_result(
3580 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
3581 playlist_id=playlist_id, playlist_title=title)
3582
3583 def _extract_availability(self, data):
3584 """
3585 Gets the availability of a given playlist/tab.
3586 Note: Unless YouTube tells us explicitly, we do not assume it is public
3587 @param data: response
3588 """
3589 is_private = is_unlisted = None
3590 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3591 badge_labels = self._extract_badges(renderer)
3592
3593 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3594 privacy_dropdown_entries = try_get(
3595 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3596 for renderer_dict in privacy_dropdown_entries:
3597 is_selected = try_get(
3598 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3599 if not is_selected:
3600 continue
3601 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
3602 if label:
3603 badge_labels.add(label.lower())
3604 break
3605
3606 for badge_label in badge_labels:
3607 if badge_label == 'unlisted':
3608 is_unlisted = True
3609 elif badge_label == 'private':
3610 is_private = True
3611 elif badge_label == 'public':
3612 is_unlisted = is_private = False
3613 return self._availability(is_private, False, False, False, is_unlisted)
3614
3615 @staticmethod
3616 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3617 sidebar_renderer = try_get(
3618 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3619 for item in sidebar_renderer:
3620 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3621 if renderer:
3622 return renderer
3623
3624 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
3625 """
3626 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3627 """
3628 browse_id = params = None
3629 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3630 if not renderer:
3631 return
3632 menu_renderer = try_get(
3633 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3634 for menu_item in menu_renderer:
3635 if not isinstance(menu_item, dict):
3636 continue
3637 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3638 text = try_get(
3639 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3640 if not text or text.lower() != 'show unavailable videos':
3641 continue
3642 browse_endpoint = try_get(
3643 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3644 browse_id = browse_endpoint.get('browseId')
3645 params = browse_endpoint.get('params')
3646 break
3647
3648 headers = self.generate_api_headers(
3649 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3650 visitor_data=self._extract_visitor_data(data, ytcfg))
3651 query = {
3652 'params': params or 'wgYCCAA=',
3653 'browseId': browse_id or 'VL%s' % item_id
3654 }
3655 return self._extract_response(
3656 item_id=item_id, headers=headers, query=query,
3657 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
3658 note='Downloading API JSON with unavailable videos')
3659
3660 def _extract_webpage(self, url, item_id, fatal=True):
3661 retries = self.get_param('extractor_retries', 3)
3662 count = -1
3663 webpage = data = last_error = None
3664 while count < retries:
3665 count += 1
3666 # Sometimes youtube returns a webpage with incomplete ytInitialData
3667 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3668 if last_error:
3669 self.report_warning('%s. Retrying ...' % last_error)
3670 try:
3671 webpage = self._download_webpage(
3672 url, item_id,
3673 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
3674 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
3675 except ExtractorError as e:
3676 if isinstance(e.cause, network_exceptions):
3677 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
3678 last_error = error_to_compat_str(e.cause or e.msg)
3679 if count < retries:
3680 continue
3681 if fatal:
3682 raise
3683 self.report_warning(error_to_compat_str(e))
3684 break
3685 else:
3686 try:
3687 self._extract_and_report_alerts(data)
3688 except ExtractorError as e:
3689 if fatal:
3690 raise
3691 self.report_warning(error_to_compat_str(e))
3692 break
3693
3694 if dict_get(data, ('contents', 'currentVideoEndpoint')):
3695 break
3696
3697 last_error = 'Incomplete yt initial data received'
3698 if count >= retries:
3699 if fatal:
3700 raise ExtractorError(last_error)
3701 self.report_warning(last_error)
3702 break
3703
3704 return webpage, data
3705
3706 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
3707 data = None
3708 if 'webpage' not in self._configuration_arg('skip'):
3709 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
3710 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
3711 if not data:
3712 if not ytcfg and self.is_authenticated:
3713 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
3714 if 'authcheck' not in self._configuration_arg('skip') and fatal:
3715 raise ExtractorError(
3716 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
3717 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
3718 expected=True)
3719 self.report_warning(msg, only_once=True)
3720 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
3721 return data, ytcfg
3722
3723 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
3724 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
3725 resolve_response = self._extract_response(
3726 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
3727 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
3728 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
3729 for ep_key, ep in endpoints.items():
3730 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
3731 if params:
3732 return self._extract_response(
3733 item_id=item_id, query=params, ep=ep, headers=headers,
3734 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
3735 check_get_keys=('contents', 'currentVideoEndpoint'))
3736 err_note = 'Failed to resolve url (does the playlist exist?)'
3737 if fatal:
3738 raise ExtractorError(err_note, expected=True)
3739 self.report_warning(err_note, item_id)
3740
3741 @staticmethod
3742 def _smuggle_data(entries, data):
3743 for entry in entries:
3744 if data:
3745 entry['url'] = smuggle_url(entry['url'], data)
3746 yield entry
3747
3748 _SEARCH_PARAMS = None
3749
3750 def _search_results(self, query, params=NO_DEFAULT):
3751 data = {'query': query}
3752 if params is NO_DEFAULT:
3753 params = self._SEARCH_PARAMS
3754 if params:
3755 data['params'] = params
3756 continuation_list = [None]
3757 for page_num in itertools.count(1):
3758 data.update(continuation_list[0] or {})
3759 search = self._extract_response(
3760 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3761 check_get_keys=('contents', 'onResponseReceivedCommands'))
3762 slr_contents = try_get(
3763 search,
3764 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3765 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3766 list)
3767 yield from self._extract_entries({'contents': slr_contents}, continuation_list)
3768 if not continuation_list[0]:
3769 break
3770
3771
3772class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
3773 IE_DESC = 'YouTube Tabs'
3774 _VALID_URL = r'''(?x:
3775 https?://
3776 (?:\w+\.)?
3777 (?:
3778 youtube(?:kids)?\.com|
3779 %(invidious)s
3780 )/
3781 (?:
3782 (?P<channel_type>channel|c|user|browse)/|
3783 (?P<not_channel>
3784 feed/|hashtag/|
3785 (?:playlist|watch)\?.*?\blist=
3786 )|
3787 (?!(?:%(reserved_names)s)\b) # Direct URLs
3788 )
3789 (?P<id>[^/?\#&]+)
3790 )''' % {
3791 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3792 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3793 }
3794 IE_NAME = 'youtube:tab'
3795
3796 _TESTS = [{
3797 'note': 'playlists, multipage',
3798 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3799 'playlist_mincount': 94,
3800 'info_dict': {
3801 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3802 'title': 'Игорь Клейнер - Playlists',
3803 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3804 'uploader': 'Игорь Клейнер',
3805 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3806 },
3807 }, {
3808 'note': 'playlists, multipage, different order',
3809 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3810 'playlist_mincount': 94,
3811 'info_dict': {
3812 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3813 'title': 'Игорь Клейнер - Playlists',
3814 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3815 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3816 'uploader': 'Игорь Клейнер',
3817 },
3818 }, {
3819 'note': 'playlists, series',
3820 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3821 'playlist_mincount': 5,
3822 'info_dict': {
3823 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3824 'title': '3Blue1Brown - Playlists',
3825 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3826 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3827 'uploader': '3Blue1Brown',
3828 },
3829 }, {
3830 'note': 'playlists, singlepage',
3831 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3832 'playlist_mincount': 4,
3833 'info_dict': {
3834 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3835 'title': 'ThirstForScience - Playlists',
3836 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3837 'uploader': 'ThirstForScience',
3838 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3839 }
3840 }, {
3841 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3842 'only_matching': True,
3843 }, {
3844 'note': 'basic, single video playlist',
3845 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3846 'info_dict': {
3847 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3848 'uploader': 'Sergey M.',
3849 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3850 'title': 'youtube-dl public playlist',
3851 },
3852 'playlist_count': 1,
3853 }, {
3854 'note': 'empty playlist',
3855 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3856 'info_dict': {
3857 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3858 'uploader': 'Sergey M.',
3859 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3860 'title': 'youtube-dl empty playlist',
3861 },
3862 'playlist_count': 0,
3863 }, {
3864 'note': 'Home tab',
3865 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3866 'info_dict': {
3867 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3868 'title': 'lex will - Home',
3869 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3870 'uploader': 'lex will',
3871 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3872 },
3873 'playlist_mincount': 2,
3874 }, {
3875 'note': 'Videos tab',
3876 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3877 'info_dict': {
3878 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3879 'title': 'lex will - Videos',
3880 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3881 'uploader': 'lex will',
3882 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3883 },
3884 'playlist_mincount': 975,
3885 }, {
3886 'note': 'Videos tab, sorted by popular',
3887 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3888 'info_dict': {
3889 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3890 'title': 'lex will - Videos',
3891 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3892 'uploader': 'lex will',
3893 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3894 },
3895 'playlist_mincount': 199,
3896 }, {
3897 'note': 'Playlists tab',
3898 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3899 'info_dict': {
3900 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3901 'title': 'lex will - Playlists',
3902 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3903 'uploader': 'lex will',
3904 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3905 },
3906 'playlist_mincount': 17,
3907 }, {
3908 'note': 'Community tab',
3909 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3910 'info_dict': {
3911 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3912 'title': 'lex will - Community',
3913 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3914 'uploader': 'lex will',
3915 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3916 },
3917 'playlist_mincount': 18,
3918 }, {
3919 'note': 'Channels tab',
3920 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3921 'info_dict': {
3922 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3923 'title': 'lex will - Channels',
3924 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3925 'uploader': 'lex will',
3926 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3927 },
3928 'playlist_mincount': 12,
3929 }, {
3930 'note': 'Search tab',
3931 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3932 'playlist_mincount': 40,
3933 'info_dict': {
3934 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3935 'title': '3Blue1Brown - Search - linear algebra',
3936 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3937 'uploader': '3Blue1Brown',
3938 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3939 },
3940 }, {
3941 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3942 'only_matching': True,
3943 }, {
3944 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3945 'only_matching': True,
3946 }, {
3947 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3948 'only_matching': True,
3949 }, {
3950 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3951 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3952 'info_dict': {
3953 'title': '29C3: Not my department',
3954 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3955 'uploader': 'Christiaan008',
3956 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3957 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3958 },
3959 'playlist_count': 96,
3960 }, {
3961 'note': 'Large playlist',
3962 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3963 'info_dict': {
3964 'title': 'Uploads from Cauchemar',
3965 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3966 'uploader': 'Cauchemar',
3967 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3968 },
3969 'playlist_mincount': 1123,
3970 }, {
3971 'note': 'even larger playlist, 8832 videos',
3972 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3973 'only_matching': True,
3974 }, {
3975 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3976 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3977 'info_dict': {
3978 'title': 'Uploads from Interstellar Movie',
3979 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3980 'uploader': 'Interstellar Movie',
3981 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3982 },
3983 'playlist_mincount': 21,
3984 }, {
3985 'note': 'Playlist with "show unavailable videos" button',
3986 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3987 'info_dict': {
3988 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3989 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3990 'uploader': 'Phim Siêu Nhân Nhật Bản',
3991 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3992 },
3993 'playlist_mincount': 200,
3994 }, {
3995 'note': 'Playlist with unavailable videos in page 7',
3996 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3997 'info_dict': {
3998 'title': 'Uploads from BlankTV',
3999 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
4000 'uploader': 'BlankTV',
4001 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4002 },
4003 'playlist_mincount': 1000,
4004 }, {
4005 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
4006 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4007 'info_dict': {
4008 'title': 'Data Analysis with Dr Mike Pound',
4009 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4010 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4011 'uploader': 'Computerphile',
4012 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
4013 },
4014 'playlist_mincount': 11,
4015 }, {
4016 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4017 'only_matching': True,
4018 }, {
4019 'note': 'Playlist URL that does not actually serve a playlist',
4020 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
4021 'info_dict': {
4022 'id': 'FqZTN594JQw',
4023 'ext': 'webm',
4024 'title': "Smiley's People 01 detective, Adventure Series, Action",
4025 'uploader': 'STREEM',
4026 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4027 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4028 'upload_date': '20150526',
4029 'license': 'Standard YouTube License',
4030 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4031 'categories': ['People & Blogs'],
4032 'tags': list,
4033 'view_count': int,
4034 'like_count': int,
4035 'dislike_count': int,
4036 },
4037 'params': {
4038 'skip_download': True,
4039 },
4040 'skip': 'This video is not available.',
4041 'add_ie': [YoutubeIE.ie_key()],
4042 }, {
4043 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4044 'only_matching': True,
4045 }, {
4046 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4047 'only_matching': True,
4048 }, {
4049 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4050 'info_dict': {
4051 'id': '3yImotZU3tw', # This will keep changing
4052 'ext': 'mp4',
4053 'title': compat_str,
4054 'uploader': 'Sky News',
4055 'uploader_id': 'skynews',
4056 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4057 'upload_date': r're:\d{8}',
4058 'description': compat_str,
4059 'categories': ['News & Politics'],
4060 'tags': list,
4061 'like_count': int,
4062 'dislike_count': int,
4063 },
4064 'params': {
4065 'skip_download': True,
4066 },
4067 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
4068 }, {
4069 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4070 'info_dict': {
4071 'id': 'a48o2S1cPoo',
4072 'ext': 'mp4',
4073 'title': 'The Young Turks - Live Main Show',
4074 'uploader': 'The Young Turks',
4075 'uploader_id': 'TheYoungTurks',
4076 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4077 'upload_date': '20150715',
4078 'license': 'Standard YouTube License',
4079 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4080 'categories': ['News & Politics'],
4081 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4082 'like_count': int,
4083 'dislike_count': int,
4084 },
4085 'params': {
4086 'skip_download': True,
4087 },
4088 'only_matching': True,
4089 }, {
4090 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4091 'only_matching': True,
4092 }, {
4093 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4094 'only_matching': True,
4095 }, {
4096 'note': 'A channel that is not live. Should raise error',
4097 'url': 'https://www.youtube.com/user/numberphile/live',
4098 'only_matching': True,
4099 }, {
4100 'url': 'https://www.youtube.com/feed/trending',
4101 'only_matching': True,
4102 }, {
4103 'url': 'https://www.youtube.com/feed/library',
4104 'only_matching': True,
4105 }, {
4106 'url': 'https://www.youtube.com/feed/history',
4107 'only_matching': True,
4108 }, {
4109 'url': 'https://www.youtube.com/feed/subscriptions',
4110 'only_matching': True,
4111 }, {
4112 'url': 'https://www.youtube.com/feed/watch_later',
4113 'only_matching': True,
4114 }, {
4115 'note': 'Recommended - redirects to home page.',
4116 'url': 'https://www.youtube.com/feed/recommended',
4117 'only_matching': True,
4118 }, {
4119 'note': 'inline playlist with not always working continuations',
4120 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4121 'only_matching': True,
4122 }, {
4123 'url': 'https://www.youtube.com/course',
4124 'only_matching': True,
4125 }, {
4126 'url': 'https://www.youtube.com/zsecurity',
4127 'only_matching': True,
4128 }, {
4129 'url': 'http://www.youtube.com/NASAgovVideo/videos',
4130 'only_matching': True,
4131 }, {
4132 'url': 'https://www.youtube.com/TheYoungTurks/live',
4133 'only_matching': True,
4134 }, {
4135 'url': 'https://www.youtube.com/hashtag/cctv9',
4136 'info_dict': {
4137 'id': 'cctv9',
4138 'title': '#cctv9',
4139 },
4140 'playlist_mincount': 350,
4141 }, {
4142 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4143 'only_matching': True,
4144 }, {
4145 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4146 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4147 'only_matching': True
4148 }, {
4149 'note': '/browse/ should redirect to /channel/',
4150 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4151 'only_matching': True
4152 }, {
4153 'note': 'VLPL, should redirect to playlist?list=PL...',
4154 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4155 'info_dict': {
4156 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4157 'uploader': 'NoCopyrightSounds',
4158 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
4159 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4160 'title': 'NCS Releases',
4161 },
4162 'playlist_mincount': 166,
4163 }, {
4164 'note': 'Topic, should redirect to playlist?list=UU...',
4165 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4166 'info_dict': {
4167 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4168 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4169 'title': 'Uploads from Royalty Free Music - Topic',
4170 'uploader': 'Royalty Free Music - Topic',
4171 },
4172 'expected_warnings': [
4173 'A channel/user page was given',
4174 'The URL does not have a videos tab',
4175 ],
4176 'playlist_mincount': 101,
4177 }, {
4178 'note': 'Topic without a UU playlist',
4179 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
4180 'info_dict': {
4181 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
4182 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
4183 },
4184 'expected_warnings': [
4185 'A channel/user page was given',
4186 'The URL does not have a videos tab',
4187 'Falling back to channel URL',
4188 ],
4189 'playlist_mincount': 9,
4190 }, {
4191 'note': 'Youtube music Album',
4192 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
4193 'info_dict': {
4194 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
4195 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
4196 },
4197 'playlist_count': 50,
4198 }, {
4199 'note': 'unlisted single video playlist',
4200 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4201 'info_dict': {
4202 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4203 'uploader': 'colethedj',
4204 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4205 'title': 'yt-dlp unlisted playlist test',
4206 'availability': 'unlisted'
4207 },
4208 'playlist_count': 1,
4209 }, {
4210 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
4211 'url': 'https://www.youtube.com/feed/recommended',
4212 'info_dict': {
4213 'id': 'recommended',
4214 'title': 'recommended',
4215 },
4216 'playlist_mincount': 50,
4217 'params': {
4218 'skip_download': True,
4219 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4220 },
4221 }, {
4222 'note': 'API Fallback: /videos tab, sorted by oldest first',
4223 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
4224 'info_dict': {
4225 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4226 'title': 'Cody\'sLab - Videos',
4227 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
4228 'uploader': 'Cody\'sLab',
4229 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4230 },
4231 'playlist_mincount': 650,
4232 'params': {
4233 'skip_download': True,
4234 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4235 },
4236 }, {
4237 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
4238 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4239 'info_dict': {
4240 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4241 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4242 'title': 'Uploads from Royalty Free Music - Topic',
4243 'uploader': 'Royalty Free Music - Topic',
4244 },
4245 'expected_warnings': [
4246 'A channel/user page was given',
4247 'The URL does not have a videos tab',
4248 ],
4249 'playlist_mincount': 101,
4250 'params': {
4251 'skip_download': True,
4252 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4253 },
4254 }]
4255
4256 @classmethod
4257 def suitable(cls, url):
4258 return False if YoutubeIE.suitable(url) else super(
4259 YoutubeTabIE, cls).suitable(url)
4260
4261 def _real_extract(self, url):
4262 url, smuggled_data = unsmuggle_url(url, {})
4263 if self.is_music_url(url):
4264 smuggled_data['is_music_url'] = True
4265 info_dict = self.__real_extract(url, smuggled_data)
4266 if info_dict.get('entries'):
4267 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4268 return info_dict
4269
4270 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4271
4272 def __real_extract(self, url, smuggled_data):
4273 item_id = self._match_id(url)
4274 url = compat_urlparse.urlunparse(
4275 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4276 compat_opts = self.get_param('compat_opts', [])
4277
4278 def get_mobj(url):
4279 mobj = self._url_re.match(url).groupdict()
4280 mobj.update((k, '') for k, v in mobj.items() if v is None)
4281 return mobj
4282
4283 mobj = get_mobj(url)
4284 # Youtube returns incomplete data if tabname is not lower case
4285 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4286 if is_channel:
4287 if smuggled_data.get('is_music_url'):
4288 if item_id[:2] == 'VL':
4289 # Youtube music VL channels have an equivalent playlist
4290 item_id = item_id[2:]
4291 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4292 elif item_id[:2] == 'MP':
4293 # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4294 mdata = self._extract_tab_endpoint(
4295 'https://music.youtube.com/channel/%s' % item_id, item_id, default_client='web_music')
4296 murl = traverse_obj(
4297 mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), get_all=False, expected_type=compat_str)
4298 if not murl:
4299 raise ExtractorError('Failed to resolve album to playlist.')
4300 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
4301 elif mobj['channel_type'] == 'browse':
4302 # Youtube music /browse/ should be changed to /channel/
4303 pre = 'https://www.youtube.com/channel/%s' % item_id
4304 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4305 # Home URLs should redirect to /videos/
4306 self.report_warning(
4307 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4308 'To download only the videos in the home page, add a "/featured" to the URL')
4309 tab = '/videos'
4310
4311 url = ''.join((pre, tab, post))
4312 mobj = get_mobj(url)
4313
4314 # Handle both video/playlist URLs
4315 qs = parse_qs(url)
4316 video_id = qs.get('v', [None])[0]
4317 playlist_id = qs.get('list', [None])[0]
4318
4319 if not video_id and mobj['not_channel'].startswith('watch'):
4320 if not playlist_id:
4321 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4322 raise ExtractorError('Unable to recognize tab page')
4323 # Common mistake: https://www.youtube.com/watch?list=playlist_id
4324 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4325 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4326 mobj = get_mobj(url)
4327
4328 if video_id and playlist_id:
4329 if self.get_param('noplaylist'):
4330 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4331 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4332 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4333
4334 data, ytcfg = self._extract_data(url, item_id)
4335
4336 tabs = try_get(
4337 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4338 if tabs:
4339 selected_tab = self._extract_selected_tab(tabs)
4340 tab_name = selected_tab.get('title', '')
4341 if 'no-youtube-channel-redirect' not in compat_opts:
4342 if mobj['tab'] == '/live':
4343 # Live tab should have redirected to the video
4344 raise ExtractorError('The channel is not currently live', expected=True)
4345 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4346 if not mobj['not_channel'] and item_id[:2] == 'UC':
4347 # Topic channels don't have /videos. Use the equivalent playlist instead
4348 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4349 pl_id = 'UU%s' % item_id[2:]
4350 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4351 try:
4352 data, ytcfg, item_id, url = *self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True), pl_id, pl_url
4353 except ExtractorError:
4354 self.report_warning('The playlist gave error. Falling back to channel URL')
4355 else:
4356 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4357
4358 self.write_debug('Final URL: %s' % url)
4359
4360 # YouTube sometimes provides a button to reload playlist with unavailable videos.
4361 if 'no-youtube-unavailable-videos' not in compat_opts:
4362 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
4363 self._extract_and_report_alerts(data, only_once=True)
4364 tabs = try_get(
4365 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4366 if tabs:
4367 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
4368
4369 playlist = try_get(
4370 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4371 if playlist:
4372 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
4373
4374 video_id = try_get(
4375 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4376 compat_str) or video_id
4377 if video_id:
4378 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4379 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4380 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4381
4382 raise ExtractorError('Unable to recognize tab page')
4383
4384
4385class YoutubePlaylistIE(InfoExtractor):
4386 IE_DESC = 'YouTube playlists'
4387 _VALID_URL = r'''(?x)(?:
4388 (?:https?://)?
4389 (?:\w+\.)?
4390 (?:
4391 (?:
4392 youtube(?:kids)?\.com|
4393 %(invidious)s
4394 )
4395 /.*?\?.*?\blist=
4396 )?
4397 (?P<id>%(playlist_id)s)
4398 )''' % {
4399 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4400 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4401 }
4402 IE_NAME = 'youtube:playlist'
4403 _TESTS = [{
4404 'note': 'issue #673',
4405 'url': 'PLBB231211A4F62143',
4406 'info_dict': {
4407 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4408 'id': 'PLBB231211A4F62143',
4409 'uploader': 'Wickydoo',
4410 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4411 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4412 },
4413 'playlist_mincount': 29,
4414 }, {
4415 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4416 'info_dict': {
4417 'title': 'YDL_safe_search',
4418 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4419 },
4420 'playlist_count': 2,
4421 'skip': 'This playlist is private',
4422 }, {
4423 'note': 'embedded',
4424 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4425 'playlist_count': 4,
4426 'info_dict': {
4427 'title': 'JODA15',
4428 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4429 'uploader': 'milan',
4430 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4431 }
4432 }, {
4433 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4434 'playlist_mincount': 654,
4435 'info_dict': {
4436 'title': '2018 Chinese New Singles (11/6 updated)',
4437 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4438 'uploader': 'LBK',
4439 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4440 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4441 }
4442 }, {
4443 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4444 'only_matching': True,
4445 }, {
4446 # music album playlist
4447 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4448 'only_matching': True,
4449 }]
4450
4451 @classmethod
4452 def suitable(cls, url):
4453 if YoutubeTabIE.suitable(url):
4454 return False
4455 from ..utils import parse_qs
4456 qs = parse_qs(url)
4457 if qs.get('v', [None])[0]:
4458 return False
4459 return super(YoutubePlaylistIE, cls).suitable(url)
4460
4461 def _real_extract(self, url):
4462 playlist_id = self._match_id(url)
4463 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4464 url = update_url_query(
4465 'https://www.youtube.com/playlist',
4466 parse_qs(url) or {'list': playlist_id})
4467 if is_music_url:
4468 url = smuggle_url(url, {'is_music_url': True})
4469 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4470
4471
4472class YoutubeYtBeIE(InfoExtractor):
4473 IE_DESC = 'youtu.be'
4474 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4475 _TESTS = [{
4476 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4477 'info_dict': {
4478 'id': 'yeWKywCrFtk',
4479 'ext': 'mp4',
4480 'title': 'Small Scale Baler and Braiding Rugs',
4481 'uploader': 'Backus-Page House Museum',
4482 'uploader_id': 'backuspagemuseum',
4483 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4484 'upload_date': '20161008',
4485 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4486 'categories': ['Nonprofits & Activism'],
4487 'tags': list,
4488 'like_count': int,
4489 'dislike_count': int,
4490 },
4491 'params': {
4492 'noplaylist': True,
4493 'skip_download': True,
4494 },
4495 }, {
4496 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4497 'only_matching': True,
4498 }]
4499
4500 def _real_extract(self, url):
4501 mobj = self._match_valid_url(url)
4502 video_id = mobj.group('id')
4503 playlist_id = mobj.group('playlist_id')
4504 return self.url_result(
4505 update_url_query('https://www.youtube.com/watch', {
4506 'v': video_id,
4507 'list': playlist_id,
4508 'feature': 'youtu.be',
4509 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4510
4511
4512class YoutubeYtUserIE(InfoExtractor):
4513 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
4514 _VALID_URL = r'ytuser:(?P<id>.+)'
4515 _TESTS = [{
4516 'url': 'ytuser:phihag',
4517 'only_matching': True,
4518 }]
4519
4520 def _real_extract(self, url):
4521 user_id = self._match_id(url)
4522 return self.url_result(
4523 'https://www.youtube.com/user/%s/videos' % user_id,
4524 ie=YoutubeTabIE.ie_key(), video_id=user_id)
4525
4526
4527class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4528 IE_NAME = 'youtube:favorites'
4529 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
4530 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4531 _LOGIN_REQUIRED = True
4532 _TESTS = [{
4533 'url': ':ytfav',
4534 'only_matching': True,
4535 }, {
4536 'url': ':ytfavorites',
4537 'only_matching': True,
4538 }]
4539
4540 def _real_extract(self, url):
4541 return self.url_result(
4542 'https://www.youtube.com/playlist?list=LL',
4543 ie=YoutubeTabIE.ie_key())
4544
4545
4546class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4547 IE_DESC = 'YouTube search'
4548 IE_NAME = 'youtube:search'
4549 _SEARCH_KEY = 'ytsearch'
4550 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
4551 _TESTS = []
4552
4553
4554class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4555 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4556 _SEARCH_KEY = 'ytsearchdate'
4557 IE_DESC = 'YouTube search, newest videos first'
4558 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
4559
4560
4561class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
4562 IE_DESC = 'YouTube search URLs with sorting and filter support'
4563 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4564 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4565 _TESTS = [{
4566 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4567 'playlist_mincount': 5,
4568 'info_dict': {
4569 'id': 'youtube-dl test video',
4570 'title': 'youtube-dl test video',
4571 }
4572 }, {
4573 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
4574 'playlist_mincount': 5,
4575 'info_dict': {
4576 'id': 'python',
4577 'title': 'python',
4578 }
4579
4580 }, {
4581 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4582 'only_matching': True,
4583 }]
4584
4585 def _real_extract(self, url):
4586 qs = parse_qs(url)
4587 query = (qs.get('search_query') or qs.get('q'))[0]
4588 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
4589
4590
4591class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4592 """
4593 Base class for feed extractors
4594 Subclasses must define the _FEED_NAME property.
4595 """
4596 _LOGIN_REQUIRED = True
4597 _TESTS = []
4598
4599 @property
4600 def IE_NAME(self):
4601 return 'youtube:%s' % self._FEED_NAME
4602
4603 def _real_extract(self, url):
4604 return self.url_result(
4605 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4606 ie=YoutubeTabIE.ie_key())
4607
4608
4609class YoutubeWatchLaterIE(InfoExtractor):
4610 IE_NAME = 'youtube:watchlater'
4611 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
4612 _VALID_URL = r':ytwatchlater'
4613 _TESTS = [{
4614 'url': ':ytwatchlater',
4615 'only_matching': True,
4616 }]
4617
4618 def _real_extract(self, url):
4619 return self.url_result(
4620 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4621
4622
4623class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4624 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
4625 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4626 _FEED_NAME = 'recommended'
4627 _LOGIN_REQUIRED = False
4628 _TESTS = [{
4629 'url': ':ytrec',
4630 'only_matching': True,
4631 }, {
4632 'url': ':ytrecommended',
4633 'only_matching': True,
4634 }, {
4635 'url': 'https://youtube.com',
4636 'only_matching': True,
4637 }]
4638
4639
4640class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4641 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
4642 _VALID_URL = r':ytsub(?:scription)?s?'
4643 _FEED_NAME = 'subscriptions'
4644 _TESTS = [{
4645 'url': ':ytsubs',
4646 'only_matching': True,
4647 }, {
4648 'url': ':ytsubscriptions',
4649 'only_matching': True,
4650 }]
4651
4652
4653class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4654 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
4655 _VALID_URL = r':ythis(?:tory)?'
4656 _FEED_NAME = 'history'
4657 _TESTS = [{
4658 'url': ':ythistory',
4659 'only_matching': True,
4660 }]
4661
4662
4663class YoutubeTruncatedURLIE(InfoExtractor):
4664 IE_NAME = 'youtube:truncated_url'
4665 IE_DESC = False # Do not list
4666 _VALID_URL = r'''(?x)
4667 (?:https?://)?
4668 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4669 (?:watch\?(?:
4670 feature=[a-z_]+|
4671 annotation_id=annotation_[^&]+|
4672 x-yt-cl=[0-9]+|
4673 hl=[^&]*|
4674 t=[0-9]+
4675 )?
4676 |
4677 attribution_link\?a=[^&]+
4678 )
4679 $
4680 '''
4681
4682 _TESTS = [{
4683 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4684 'only_matching': True,
4685 }, {
4686 'url': 'https://www.youtube.com/watch?',
4687 'only_matching': True,
4688 }, {
4689 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4690 'only_matching': True,
4691 }, {
4692 'url': 'https://www.youtube.com/watch?feature=foo',
4693 'only_matching': True,
4694 }, {
4695 'url': 'https://www.youtube.com/watch?hl=en-GB',
4696 'only_matching': True,
4697 }, {
4698 'url': 'https://www.youtube.com/watch?t=2372',
4699 'only_matching': True,
4700 }]
4701
4702 def _real_extract(self, url):
4703 raise ExtractorError(
4704 'Did you forget to quote the URL? Remember that & is a meta '
4705 'character in most shells, so you want to put the URL in quotes, '
4706 'like youtube-dl '
4707 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4708 ' or simply youtube-dl BaW_jenozKc .',
4709 expected=True)
4710
4711
4712class YoutubeClipIE(InfoExtractor):
4713 IE_NAME = 'youtube:clip'
4714 IE_DESC = False # Do not list
4715 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4716
4717 def _real_extract(self, url):
4718 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4719 return self.url_result(url, 'Generic')
4720
4721
4722class YoutubeTruncatedIDIE(InfoExtractor):
4723 IE_NAME = 'youtube:truncated_id'
4724 IE_DESC = False # Do not list
4725 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4726
4727 _TESTS = [{
4728 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4729 'only_matching': True,
4730 }]
4731
4732 def _real_extract(self, url):
4733 video_id = self._match_id(url)
4734 raise ExtractorError(
4735 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4736 expected=True)