]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[iPrima] Fix extractor (#1541)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
2d30521a 37 float_or_none,
11f9be09 38 format_field,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
641ad5d8 41 is_html,
34921b43 42 join_nonempty,
94278f72 43 mimetype2ext,
9c0d7f49 44 network_exceptions,
11f9be09 45 orderedSet,
6310acf5 46 parse_codecs,
49bd8c66 47 parse_count,
7c80519c 48 parse_duration,
7ea65411 49 parse_iso8601,
4dfbf869 50 parse_qs,
dca3ff4a 51 qualities,
c0ac49bc 52 remove_end,
3995d37d 53 remove_start,
cf7e015f 54 smuggle_url,
dbdaaa23 55 str_or_none,
c93d53f5 56 str_to_int,
7c365c21 57 traverse_obj,
556dbe7f 58 try_get,
c5e8d7af
PH
59 unescapeHTML,
60 unified_strdate,
cf7e015f 61 unsmuggle_url,
8bdd16b4 62 update_url_query,
21c340b8 63 url_or_none,
fe93e2c4 64 urljoin,
7c365c21 65 variadic,
c5e8d7af
PH
66)
67
5f6a1245 68
000c15a4 69# any clients starting with _ cannot be explicity requested by the user
70INNERTUBE_CLIENTS = {
71 'web': {
72 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
73 'INNERTUBE_CONTEXT': {
74 'client': {
75 'clientName': 'WEB',
76 'clientVersion': '2.20210622.10.00',
77 }
78 },
79 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
80 },
81 'web_embedded': {
82 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
83 'INNERTUBE_CONTEXT': {
84 'client': {
85 'clientName': 'WEB_EMBEDDED_PLAYER',
86 'clientVersion': '1.20210620.0.1',
87 },
88 },
89 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
90 },
91 'web_music': {
92 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
93 'INNERTUBE_HOST': 'music.youtube.com',
94 'INNERTUBE_CONTEXT': {
95 'client': {
96 'clientName': 'WEB_REMIX',
97 'clientVersion': '1.20210621.00.00',
98 }
99 },
100 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
101 },
e7e94f2a
D
102 'web_creator': {
103 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
104 'INNERTUBE_CONTEXT': {
105 'client': {
106 'clientName': 'WEB_CREATOR',
107 'clientVersion': '1.20210621.00.00',
108 }
109 },
110 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
111 },
000c15a4 112 'android': {
113 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
114 'INNERTUBE_CONTEXT': {
115 'client': {
116 'clientName': 'ANDROID',
117 'clientVersion': '16.20',
118 }
119 },
120 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 121 'REQUIRE_JS_PLAYER': False
000c15a4 122 },
123 'android_embedded': {
124 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
125 'INNERTUBE_CONTEXT': {
126 'client': {
127 'clientName': 'ANDROID_EMBEDDED_PLAYER',
128 'clientVersion': '16.20',
129 },
130 },
b6de707d 131 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
132 'REQUIRE_JS_PLAYER': False
000c15a4 133 },
134 'android_music': {
135 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
136 'INNERTUBE_HOST': 'music.youtube.com',
137 'INNERTUBE_CONTEXT': {
138 'client': {
139 'clientName': 'ANDROID_MUSIC',
140 'clientVersion': '4.32',
141 }
142 },
143 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 144 'REQUIRE_JS_PLAYER': False
000c15a4 145 },
e7e94f2a
D
146 'android_creator': {
147 'INNERTUBE_CONTEXT': {
148 'client': {
149 'clientName': 'ANDROID_CREATOR',
150 'clientVersion': '21.24.100',
151 },
152 },
b6de707d 153 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
154 'REQUIRE_JS_PLAYER': False
e7e94f2a 155 },
3619f78d 156 # ios has HLS live streams
157 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
000c15a4 158 'ios': {
159 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
160 'INNERTUBE_CONTEXT': {
161 'client': {
162 'clientName': 'IOS',
163 'clientVersion': '16.20',
164 }
165 },
b6de707d 166 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
167 'REQUIRE_JS_PLAYER': False
000c15a4 168 },
169 'ios_embedded': {
170 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
171 'INNERTUBE_CONTEXT': {
172 'client': {
173 'clientName': 'IOS_MESSAGES_EXTENSION',
174 'clientVersion': '16.20',
175 },
176 },
b6de707d 177 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
178 'REQUIRE_JS_PLAYER': False
000c15a4 179 },
180 'ios_music': {
181 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
182 'INNERTUBE_HOST': 'music.youtube.com',
183 'INNERTUBE_CONTEXT': {
184 'client': {
185 'clientName': 'IOS_MUSIC',
186 'clientVersion': '4.32',
187 },
188 },
b6de707d 189 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
190 'REQUIRE_JS_PLAYER': False
000c15a4 191 },
e7e94f2a
D
192 'ios_creator': {
193 'INNERTUBE_CONTEXT': {
194 'client': {
195 'clientName': 'IOS_CREATOR',
196 'clientVersion': '21.24.100',
197 },
198 },
b6de707d 199 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
200 'REQUIRE_JS_PLAYER': False
e7e94f2a 201 },
3619f78d 202 # mweb has 'ultralow' formats
203 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 204 'mweb': {
205 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
206 'INNERTUBE_CONTEXT': {
207 'client': {
208 'clientName': 'MWEB',
209 'clientVersion': '2.20210721.07.00',
210 }
211 },
212 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
213 },
214}
215
216
217def build_innertube_clients():
65c2fde2 218 third_party = {
219 'embedUrl': 'https://google.com', # Can be any valid URL
220 }
000c15a4 221 base_clients = ('android', 'web', 'ios', 'mweb')
222 priority = qualities(base_clients[::-1])
223
224 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 225 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 226 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 227 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 228 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
229 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
230
231 if client in base_clients:
232 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
233 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
65c2fde2 234 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 235 agegate_ytcfg['priority'] -= 1
236 elif client.endswith('_embedded'):
65c2fde2 237 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 238 ytcfg['priority'] -= 2
239 else:
240 ytcfg['priority'] -= 3
241
242
243build_innertube_clients()
244
245
de7f3446 246class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 247 """Provide base functions for Youtube extractors"""
e00eb564 248
3462ffa8 249 _RESERVED_NAMES = (
3cd786db 250 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
3619f78d 251 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
252 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 253 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 254
3619f78d 255 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
256
b2e8bc1b 257 _NETRC_MACHINE = 'youtube'
3619f78d 258
b2e8bc1b
JMF
259 # If True it will raise an error if no login info is provided
260 _LOGIN_REQUIRED = False
261
b2e8bc1b 262 def _login(self):
83317f69 263 """
264 Attempt to log in to YouTube.
83317f69 265 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
266 """
9d5d4d64 267
982ee69a
MB
268 if (self._LOGIN_REQUIRED
269 and self.get_param('cookiefile') is None
270 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 271 self.raise_login_required(
272 'Login details are needed to download this content', method='cookies')
68217024 273 username, password = self._get_login_info()
9d5d4d64 274 if username:
24b0a72b 275 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
b2e8bc1b 276
cce889b9 277 def _initialize_consent(self):
278 cookies = self._get_cookies('https://www.youtube.com/')
279 if cookies.get('__Secure-3PSID'):
280 return
281 consent_id = None
282 consent = cookies.get('CONSENT')
283 if consent:
284 if 'YES' in consent.value:
285 return
286 consent_id = self._search_regex(
287 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
288 if not consent_id:
289 consent_id = random.randint(100, 999)
290 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 291
b2e8bc1b 292 def _real_initialize(self):
cce889b9 293 self._initialize_consent()
24b0a72b 294 self._login()
c5e8d7af 295
a0566bbf 296 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 297 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
298 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 299
000c15a4 300 def _get_default_ytcfg(self, client='web'):
301 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 302
000c15a4 303 def _get_innertube_host(self, client='web'):
304 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 305
000c15a4 306 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 307 # try_get but with fallback to default ytcfg client values when present
308 _func = lambda y: try_get(y, getter, expected_type)
309 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
310
000c15a4 311 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 312 return self._ytcfg_get_safe(
313 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
314 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
109dd3b2 315
000c15a4 316 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 317 return self._ytcfg_get_safe(
318 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
319 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
109dd3b2 320
000c15a4 321 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 322 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
323
000c15a4 324 def _extract_context(self, ytcfg=None, default_client='web'):
109dd3b2 325 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
326 context = _get_context(ytcfg)
327 if context:
328 return context
329
330 context = _get_context(self._get_default_ytcfg(default_client))
331 if not ytcfg:
332 return context
333
334 # Recreate the client context (required)
335 context['client'].update({
336 'clientVersion': self._extract_client_version(ytcfg, default_client),
337 'clientName': self._extract_client_name(ytcfg, default_client),
338 })
339 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
340 if visitor_data:
341 context['client']['visitorData'] = visitor_data
342 return context
343
cf87314d 344 _SAPISID = None
345
109dd3b2 346 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 347 time_now = round(time.time())
cf87314d 348 if self._SAPISID is None:
349 yt_cookies = self._get_cookies('https://www.youtube.com')
350 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
351 # See: https://github.com/yt-dlp/yt-dlp/issues/393
352 sapisid_cookie = dict_get(
353 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
354 if sapisid_cookie and sapisid_cookie.value:
355 self._SAPISID = sapisid_cookie.value
356 self.write_debug('Extracted SAPISID cookie')
357 # SAPISID cookie is required if not already present
358 if not yt_cookies.get('SAPISID'):
359 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
360 self._set_cookie(
361 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
362 else:
363 self._SAPISID = False
364 if not self._SAPISID:
365 return None
1974e99f 366 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
367 sapisidhash = hashlib.sha1(
cf87314d 368 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
1974e99f 369 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
370
371 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 372 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 373 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 374
109dd3b2 375 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 376 data.update(query)
11f9be09 377 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 378 real_headers.update({'content-type': 'application/json'})
379 if headers:
380 real_headers.update(headers)
545cc85d 381 return self._download_json(
109dd3b2 382 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 383 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 384 data=json.dumps(data).encode('utf8'), headers=real_headers,
385 query={'key': api_key or self._extract_api_key()})
386
ac56cf38 387 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
388 data = self._search_regex(
389 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
390 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
391 if data:
392 return self._parse_json(data, item_id, fatal=fatal)
0c148415 393
99e9e001 394 @staticmethod
395 def _extract_session_index(*data):
396 """
397 Index of current account in account list.
398 See: https://github.com/yt-dlp/yt-dlp/pull/519
399 """
400 for ytcfg in data:
401 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
402 if session_index is not None:
403 return session_index
404
405 # Deprecated?
406 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca
M
407 if ytcfg:
408 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
409 if token:
410 return token
99e9e001 411 if webpage:
412 return self._search_regex(
413 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
414 'identity token', default=None, fatal=False)
a1c5d2ca
M
415
416 @staticmethod
fe93e2c4 417 def _extract_account_syncid(*args):
8ea3f7b9 418 """
419 Extract syncId required to download private playlists of secondary channels
fe93e2c4 420 @params response and/or ytcfg
8ea3f7b9 421 """
fe93e2c4 422 for data in args:
423 # ytcfg includes channel_syncid if on secondary channel
424 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
425 if delegated_sid:
426 return delegated_sid
427 sync_ids = (try_get(
428 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
e6f21b3d 429 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
fe93e2c4 430 if len(sync_ids) >= 2 and sync_ids[1]:
431 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
432 # and just "user_syncid||" for primary channel. We only want the channel_syncid
433 return sync_ids[0]
a1c5d2ca 434
ac56cf38 435 @staticmethod
436 def _extract_visitor_data(*args):
437 """
438 Extracts visitorData from an API response or ytcfg
439 Appears to be used to track session state
440 """
441 return traverse_obj(
442 args, (..., ('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
443 expected_type=compat_str, get_all=False)
444
99e9e001 445 @property
446 def is_authenticated(self):
447 return bool(self._generate_sapisidhash_header())
448
11f9be09 449 def extract_ytcfg(self, video_id, webpage):
8c54a305 450 if not webpage:
451 return {}
29f7c58a 452 return self._parse_json(
453 self._search_regex(
454 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 455 default='{}'), video_id, fatal=False) or {}
456
11f9be09 457 def generate_api_headers(
99e9e001 458 self, *, ytcfg=None, account_syncid=None, session_index=None,
459 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
460
11f9be09 461 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 462 headers = {
109dd3b2 463 'X-YouTube-Client-Name': compat_str(
11f9be09 464 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
465 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 466 'Origin': origin,
467 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
468 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
ac56cf38 469 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
99e9e001 470 }
471 if session_index is None:
314ee305 472 session_index = self._extract_session_index(ytcfg)
473 if account_syncid or session_index is not None:
474 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 475
109dd3b2 476 auth = self._generate_sapisidhash_header(origin)
f4f751af 477 if auth is not None:
478 headers['Authorization'] = auth
109dd3b2 479 headers['X-Origin'] = origin
99e9e001 480 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 481
2d6659b9 482 @staticmethod
483 def _build_api_continuation_query(continuation, ctp=None):
484 query = {
485 'continuation': continuation
486 }
487 # TODO: Inconsistency with clickTrackingParams.
488 # Currently we have a fixed ctp contained within context (from ytcfg)
489 # and a ctp in root query for continuation.
490 if ctp:
491 query['clickTracking'] = {'clickTrackingParams': ctp}
492 return query
493
2d6659b9 494 @classmethod
495 def _extract_next_continuation_data(cls, renderer):
496 next_continuation = try_get(
497 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
498 lambda x: x['continuation']['reloadContinuationData']), dict)
499 if not next_continuation:
500 return
501 continuation = next_continuation.get('continuation')
502 if not continuation:
503 return
504 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 505 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 506
507 @classmethod
508 def _extract_continuation_ep_data(cls, continuation_ep: dict):
509 if isinstance(continuation_ep, dict):
510 continuation = try_get(
511 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
512 if not continuation:
513 return
514 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 515 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 516
517 @classmethod
518 def _extract_continuation(cls, renderer):
519 next_continuation = cls._extract_next_continuation_data(renderer)
520 if next_continuation:
521 return next_continuation
fe93e2c4 522
2d6659b9 523 contents = []
524 for key in ('contents', 'items'):
525 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 526
2d6659b9 527 for content in contents:
528 if not isinstance(content, dict):
529 continue
530 continuation_ep = try_get(
531 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
532 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
533 dict)
534 continuation = cls._extract_continuation_ep_data(continuation_ep)
535 if continuation:
536 return continuation
537
fe93e2c4 538 @classmethod
539 def _extract_alerts(cls, data):
109dd3b2 540 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
541 if not isinstance(alert_dict, dict):
542 continue
543 for alert in alert_dict.values():
544 alert_type = alert.get('type')
545 if not alert_type:
546 continue
052e1350 547 message = cls._get_text(alert, 'text')
109dd3b2 548 if message:
549 yield alert_type, message
550
c0ac49bc 551 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 552 errors = []
553 warnings = []
554 for alert_type, alert_message in alerts:
641ad5d8 555 if alert_type.lower() == 'error' and fatal:
109dd3b2 556 errors.append([alert_type, alert_message])
557 else:
558 warnings.append([alert_type, alert_message])
559
560 for alert_type, alert_message in (warnings + errors[:-1]):
c0ac49bc 561 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
109dd3b2 562 if errors:
563 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
564
565 def _extract_and_report_alerts(self, data, *args, **kwargs):
566 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
567
47193e02 568 def _extract_badges(self, renderer: dict):
569 badges = set()
570 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
571 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
572 if label:
573 badges.add(label.lower())
574 return badges
575
576 @staticmethod
052e1350 577 def _get_text(data, *path_list, max_runs=None):
578 for path in path_list or [None]:
579 if path is None:
580 obj = [data]
581 else:
582 obj = traverse_obj(data, path, default=[])
583 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
584 obj = [obj]
585 for item in obj:
586 text = try_get(item, lambda x: x['simpleText'], compat_str)
587 if text:
588 return text
589 runs = try_get(item, lambda x: x['runs'], list) or []
590 if not runs and isinstance(item, list):
591 runs = item
592
593 runs = runs[:min(len(runs), max_runs or len(runs))]
594 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
595 if text:
596 return text
47193e02 597
109dd3b2 598 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
599 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 600 default_client='web'):
109dd3b2 601 response = None
602 last_error = None
603 count = -1
604 retries = self.get_param('extractor_retries', 3)
605 if check_get_keys is None:
606 check_get_keys = []
607 while count < retries:
608 count += 1
609 if last_error:
c0ac49bc 610 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
109dd3b2 611 try:
612 response = self._call_api(
613 ep=ep, fatal=True, headers=headers,
614 video_id=item_id, query=query,
615 context=self._extract_context(ytcfg, default_client),
616 api_key=self._extract_api_key(ytcfg, default_client),
617 api_hostname=api_hostname, default_client=default_client,
618 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
619 except ExtractorError as e:
9c0d7f49 620 if isinstance(e.cause, network_exceptions):
641ad5d8 621 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
622 e.cause.seek(0)
623 yt_error = try_get(
624 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
625 lambda x: x['error']['message'], compat_str)
626 if yt_error:
627 self._report_alerts([('ERROR', yt_error)], fatal=False)
109dd3b2 628 # Downloading page may result in intermittent 5xx HTTP error
629 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 630 # We also want to catch all other network exceptions since errors in later pages can be troublesome
631 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
632 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
526d74ec 633 last_error = error_to_compat_str(e.cause or e.msg)
9c0d7f49 634 if count < retries:
635 continue
109dd3b2 636 if fatal:
637 raise
638 else:
639 self.report_warning(error_to_compat_str(e))
640 return
641
642 else:
109dd3b2 643 try:
ac56cf38 644 self._extract_and_report_alerts(response, only_once=True)
109dd3b2 645 except ExtractorError as e:
c0ac49bc 646 # YouTube servers may return errors we want to retry on in a 200 OK response
647 # See: https://github.com/yt-dlp/yt-dlp/issues/839
648 if 'unknown error' in e.msg.lower():
649 last_error = e.msg
650 continue
109dd3b2 651 if fatal:
652 raise
653 self.report_warning(error_to_compat_str(e))
654 return
655 if not check_get_keys or dict_get(response, check_get_keys):
656 break
657 # Youtube sometimes sends incomplete data
658 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
659 last_error = 'Incomplete data received'
660 if count >= retries:
661 if fatal:
662 raise ExtractorError(last_error)
663 else:
664 self.report_warning(last_error)
665 return
666 return response
667
9297939e 668 @staticmethod
669 def is_music_url(url):
670 return re.match(r'https?://music\.youtube\.com/', url) is not None
671
30a074c2 672 def _extract_video(self, renderer):
673 video_id = renderer.get('videoId')
052e1350 674 title = self._get_text(renderer, 'title')
675 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 676 duration = parse_duration(self._get_text(
677 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
052e1350 678 view_count_text = self._get_text(renderer, 'viewCountText') or ''
30a074c2 679 view_count = str_to_int(self._search_regex(
680 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
681 'view count', default=None))
fe93e2c4 682
052e1350 683 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
fe93e2c4 684
30a074c2 685 return {
39ed931e 686 '_type': 'url',
30a074c2 687 'ie_key': YoutubeIE.ie_key(),
688 'id': video_id,
5e3f2f8f 689 'url': f'https://www.youtube.com/watch?v={video_id}',
30a074c2 690 'title': title,
691 'description': description,
692 'duration': duration,
693 'view_count': view_count,
694 'uploader': uploader,
695 }
696
0c148415 697
360e1ca5 698class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 699 IE_DESC = 'YouTube'
bc2ca1bb 700 _INVIDIOUS_SITES = (
701 # invidious-redirect websites
702 r'(?:www\.)?redirect\.invidious\.io',
703 r'(?:(?:www|dev)\.)?invidio\.us',
704 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
705 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 706 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 707 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 708 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 709 # youtube-dl invidious instances list
710 r'(?:(?:www|no)\.)?invidiou\.sh',
711 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
712 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 713 r'(?:www\.)?invidious\.mastodon\.host',
714 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 715 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 716 r'(?:www\.)?invidious\.tinfoil-hat\.net',
717 r'(?:www\.)?invidious\.himiko\.cloud',
718 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 719 r'(?:www\.)?invidious\.tube',
720 r'(?:www\.)?invidiou\.site',
721 r'(?:www\.)?invidious\.site',
722 r'(?:www\.)?invidious\.xyz',
723 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 724 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 725 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 726 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 727 r'(?:www\.)?tube\.poal\.co',
728 r'(?:www\.)?tube\.connect\.cafe',
729 r'(?:www\.)?vid\.wxzm\.sx',
730 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 731 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 732 r'(?:www\.)?yewtu\.be',
733 r'(?:www\.)?yt\.elukerio\.org',
734 r'(?:www\.)?yt\.lelux\.fi',
735 r'(?:www\.)?invidious\.ggc-project\.de',
736 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 737 r'(?:www\.)?ytprivate\.com',
738 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 739 r'(?:www\.)?invidious\.toot\.koeln',
740 r'(?:www\.)?invidious\.fdn\.fr',
741 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 742 r'(?:www\.)?invidious\.namazso\.eu',
743 r'(?:www\.)?invidious\.silkky\.cloud',
744 r'(?:www\.)?invidious\.exonip\.de',
745 r'(?:www\.)?invidious\.riverside\.rocks',
746 r'(?:www\.)?invidious\.blamefran\.net',
747 r'(?:www\.)?invidious\.moomoo\.de',
748 r'(?:www\.)?ytb\.trom\.tf',
749 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 750 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
751 r'(?:www\.)?qklhadlycap4cnod\.onion',
752 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
753 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
754 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
755 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
756 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
757 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 758 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
759 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
760 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
761 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 762 )
cb7dfeea 763 _VALID_URL = r"""(?x)^
c5e8d7af 764 (
edb53e2d 765 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 766 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
767 (?:www\.)?deturl\.com/www\.youtube\.com|
768 (?:www\.)?pwnyoutube\.com|
769 (?:www\.)?hooktube\.com|
770 (?:www\.)?yourepeat\.com|
771 tube\.majestyc\.net|
772 %(invidious)s|
773 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
774 (?:.*?\#/)? # handle anchor (#/) redirect urls
775 (?: # the various things that can precede the ID:
8fc54b12 776 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
c5e8d7af 777 |(?: # or the v= param in all its forms
f7000f3a 778 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 779 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 780 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
781 v=
782 )
f4b05232 783 ))
cbaed4bb
S
784 |(?:
785 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
786 vid\.plus| # or vid.plus/xxxx
787 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 788 %(invidious)s
cbaed4bb 789 )/
edb53e2d 790 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 791 )
c5e8d7af 792 )? # all until now is optional -> you can pass the naked ID
201c1459 793 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 794 (?(1).+)? # if we found the ID, everything can follow
9297939e 795 (?:\#|$)""" % {
bc2ca1bb 796 'invidious': '|'.join(_INVIDIOUS_SITES),
797 }
e40c758c 798 _PLAYER_INFO_RE = (
cc2db878 799 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
800 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 801 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 802 )
2c62dc26 803 _formats = {
c2d3cb4c 804 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
805 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
806 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
807 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
808 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
809 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
810 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
811 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 812 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 813 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
814 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
815 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
816 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
817 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
818 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 819 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 820 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
821 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 822
823
824 # 3D videos
c2d3cb4c 825 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
826 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
827 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
828 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 829 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
830 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
831 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 832
96fb5605 833 # Apple HTTP Live Streaming
11f12195 834 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 835 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
836 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
837 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
838 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
839 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 840 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
841 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
842
843 # DASH mp4 video
d23028a8
S
844 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
845 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
846 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
847 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
848 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 849 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
850 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
851 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
852 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
853 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
854 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
855 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 856
f6f1fc92 857 # Dash mp4 audio
d23028a8
S
858 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
859 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
860 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
861 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
862 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
863 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
864 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
865
866 # Dash webm
d23028a8
S
867 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
868 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
869 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
870 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
871 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
872 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
873 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
874 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
875 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
876 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
877 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
878 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
879 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
880 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
881 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 882 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
883 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
884 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
885 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
886 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
887 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
888 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
889
890 # Dash webm audio
d23028a8
S
891 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
892 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 893
0857baad 894 # Dash webm audio with opus inside
d23028a8
S
895 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
896 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
897 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 898
ce6b9a2d
PH
899 # RTMP (unnamed)
900 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
901
902 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
903 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
904 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
905 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
906 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
907 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
908 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
909 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
910 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 911 }
29f7c58a 912 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 913
fd5c4aab
S
914 _GEO_BYPASS = False
915
78caa52a 916 IE_NAME = 'youtube'
2eb88d95
PH
917 _TESTS = [
918 {
2d3d2997 919 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
920 'info_dict': {
921 'id': 'BaW_jenozKc',
922 'ext': 'mp4',
3867038a 923 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
924 'uploader': 'Philipp Hagemeister',
925 'uploader_id': 'phihag',
ec85ded8 926 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
927 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
928 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 929 'upload_date': '20121002',
3867038a 930 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 931 'categories': ['Science & Technology'],
3867038a 932 'tags': ['youtube-dl'],
556dbe7f 933 'duration': 10,
dbdaaa23 934 'view_count': int,
3e7c1224
PH
935 'like_count': int,
936 'dislike_count': int,
7c80519c 937 'start_time': 1,
297a564b 938 'end_time': 9,
2eb88d95 939 }
0e853ca4 940 },
fccd3771 941 {
4bc3a23e
PH
942 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
943 'note': 'Embed-only video (#1746)',
944 'info_dict': {
945 'id': 'yZIXLfi8CZQ',
946 'ext': 'mp4',
947 'upload_date': '20120608',
948 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
949 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
950 'uploader': 'SET India',
94bfcd23 951 'uploader_id': 'setindia',
ec85ded8 952 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 953 'age_limit': 18,
545cc85d 954 },
955 'skip': 'Private video',
fccd3771 956 },
11b56058 957 {
8bdd16b4 958 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
959 'note': 'Use the first video ID in the URL',
960 'info_dict': {
961 'id': 'BaW_jenozKc',
962 'ext': 'mp4',
3867038a 963 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
964 'uploader': 'Philipp Hagemeister',
965 'uploader_id': 'phihag',
ec85ded8 966 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 967 'upload_date': '20121002',
3867038a 968 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 969 'categories': ['Science & Technology'],
3867038a 970 'tags': ['youtube-dl'],
556dbe7f 971 'duration': 10,
dbdaaa23 972 'view_count': int,
11b56058
PM
973 'like_count': int,
974 'dislike_count': int,
34a7de29
S
975 },
976 'params': {
977 'skip_download': True,
978 },
11b56058 979 },
dd27fd17 980 {
2d3d2997 981 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
982 'note': '256k DASH audio (format 141) via DASH manifest',
983 'info_dict': {
984 'id': 'a9LDPn-MO4I',
985 'ext': 'm4a',
986 'upload_date': '20121002',
987 'uploader_id': '8KVIDEO',
ec85ded8 988 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
989 'description': '',
990 'uploader': '8KVIDEO',
991 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 992 },
4bc3a23e
PH
993 'params': {
994 'youtube_include_dash_manifest': True,
995 'format': '141',
4919603f 996 },
de3c7fe0 997 'skip': 'format 141 not served anymore',
dd27fd17 998 },
8bdd16b4 999 # DASH manifest with encrypted signature
1000 {
1001 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1002 'info_dict': {
1003 'id': 'IB3lcPjvWLA',
1004 'ext': 'm4a',
1005 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1006 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1007 'duration': 244,
1008 'uploader': 'AfrojackVEVO',
1009 'uploader_id': 'AfrojackVEVO',
1010 'upload_date': '20131011',
cc2db878 1011 'abr': 129.495,
8bdd16b4 1012 },
1013 'params': {
1014 'youtube_include_dash_manifest': True,
1015 'format': '141/bestaudio[ext=m4a]',
1016 },
1017 },
65c2fde2 1018 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1019 {
65c2fde2 1020 'note': 'Embed allowed age-gate video',
2d3d2997 1021 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1022 'info_dict': {
1023 'id': 'HtVdAasjOgU',
1024 'ext': 'mp4',
1025 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1026 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1027 'duration': 142,
c522adb1
JMF
1028 'uploader': 'The Witcher',
1029 'uploader_id': 'WitcherGame',
ec85ded8 1030 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1031 'upload_date': '20140605',
34952f09 1032 'age_limit': 18,
c522adb1
JMF
1033 },
1034 },
65c2fde2 1035 {
1036 'note': 'Age-gate video with embed allowed in public site',
1037 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1038 'info_dict': {
1039 'id': 'HsUATh_Nc2U',
1040 'ext': 'mp4',
1041 'title': 'Godzilla 2 (Official Video)',
1042 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1043 'upload_date': '20200408',
1044 'uploader_id': 'FlyingKitty900',
1045 'uploader': 'FlyingKitty',
1046 'age_limit': 18,
1047 },
1048 },
1049 {
1050 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1051 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1052 'info_dict': {
1053 'id': 'Tq92D6wQ1mg',
1054 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1055 'ext': 'mp4',
1056 'upload_date': '20191227',
65c2fde2 1057 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1058 'uploader': 'Projekt Melody',
1059 'description': 'md5:17eccca93a786d51bc67646756894066',
1060 'age_limit': 18,
1061 },
1062 },
1063 {
1064 'note': 'Non-Agegated non-embeddable video',
1065 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1066 'info_dict': {
1067 'id': 'MeJVWBSsPAY',
1068 'ext': 'mp4',
1069 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1070 'uploader': 'Herr Lurik',
1071 'uploader_id': 'st3in234',
1072 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1073 'upload_date': '20130730',
1074 },
1075 },
1076 {
1077 'note': 'Non-bypassable age-gated video',
1078 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1079 'only_matching': True,
1080 },
8bdd16b4 1081 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1082 # YouTube Red ad is not captured for creator
1083 {
1084 'url': '__2ABJjxzNo',
1085 'info_dict': {
1086 'id': '__2ABJjxzNo',
1087 'ext': 'mp4',
1088 'duration': 266,
1089 'upload_date': '20100430',
1090 'uploader_id': 'deadmau5',
1091 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1092 'creator': 'deadmau5',
1093 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1094 'uploader': 'deadmau5',
1095 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1096 'alt_title': 'Some Chords',
8bdd16b4 1097 },
1098 'expected_warnings': [
1099 'DASH manifest missing',
1100 ]
1101 },
067aa17e 1102 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1103 {
1104 'url': 'lqQg6PlCWgI',
1105 'info_dict': {
1106 'id': 'lqQg6PlCWgI',
1107 'ext': 'mp4',
556dbe7f 1108 'duration': 6085,
90227264 1109 'upload_date': '20150827',
cbe2bd91 1110 'uploader_id': 'olympic',
ec85ded8 1111 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1112 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1113 'uploader': 'Olympics',
cbe2bd91
PH
1114 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1115 },
1116 'params': {
1117 'skip_download': 'requires avconv',
e52a40ab 1118 }
cbe2bd91 1119 },
6271f1ca
PH
1120 # Non-square pixels
1121 {
1122 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1123 'info_dict': {
1124 'id': '_b-2C3KPAM0',
1125 'ext': 'mp4',
1126 'stretched_ratio': 16 / 9.,
556dbe7f 1127 'duration': 85,
6271f1ca
PH
1128 'upload_date': '20110310',
1129 'uploader_id': 'AllenMeow',
ec85ded8 1130 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1131 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1132 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1133 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1134 },
06b491eb
S
1135 },
1136 # url_encoded_fmt_stream_map is empty string
1137 {
1138 'url': 'qEJwOuvDf7I',
1139 'info_dict': {
1140 'id': 'qEJwOuvDf7I',
f57b7835 1141 'ext': 'webm',
06b491eb
S
1142 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1143 'description': '',
1144 'upload_date': '20150404',
1145 'uploader_id': 'spbelect',
1146 'uploader': 'Наблюдатели Петербурга',
1147 },
1148 'params': {
1149 'skip_download': 'requires avconv',
e323cf3f
S
1150 },
1151 'skip': 'This live event has ended.',
06b491eb 1152 },
067aa17e 1153 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1154 {
1155 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1156 'info_dict': {
1157 'id': 'FIl7x6_3R5Y',
eb6793ba 1158 'ext': 'webm',
da77d856
S
1159 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1160 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1161 'duration': 220,
da77d856
S
1162 'upload_date': '20150625',
1163 'uploader_id': 'dorappi2000',
ec85ded8 1164 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1165 'uploader': 'dorappi2000',
eb6793ba 1166 'formats': 'mincount:31',
da77d856 1167 },
eb6793ba 1168 'skip': 'not actual anymore',
2ee8f5d8 1169 },
8a1a26ce
YCH
1170 # DASH manifest with segment_list
1171 {
1172 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1173 'md5': '8ce563a1d667b599d21064e982ab9e31',
1174 'info_dict': {
1175 'id': 'CsmdDsKjzN8',
1176 'ext': 'mp4',
17ee98e1 1177 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1178 'uploader': 'Airtek',
1179 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1180 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1181 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1182 },
1183 'params': {
1184 'youtube_include_dash_manifest': True,
1185 'format': '135', # bestvideo
be49068d
S
1186 },
1187 'skip': 'This live event has ended.',
2ee8f5d8 1188 },
cf7e015f
S
1189 {
1190 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1191 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1192 'info_dict': {
545cc85d 1193 'id': 'jvGDaLqkpTg',
1194 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1195 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1196 },
1197 'playlist': [{
1198 'info_dict': {
545cc85d 1199 'id': 'jvGDaLqkpTg',
cf7e015f 1200 'ext': 'mp4',
545cc85d 1201 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1202 'description': 'md5:e03b909557865076822aa169218d6a5d',
1203 'duration': 10643,
1204 'upload_date': '20161111',
1205 'uploader': 'Team PGP',
1206 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1207 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1208 },
1209 }, {
1210 'info_dict': {
545cc85d 1211 'id': '3AKt1R1aDnw',
cf7e015f 1212 'ext': 'mp4',
545cc85d 1213 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1214 'description': 'md5:e03b909557865076822aa169218d6a5d',
1215 'duration': 10991,
1216 'upload_date': '20161111',
1217 'uploader': 'Team PGP',
1218 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1219 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1220 },
1221 }, {
1222 'info_dict': {
545cc85d 1223 'id': 'RtAMM00gpVc',
cf7e015f 1224 'ext': 'mp4',
545cc85d 1225 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1226 'description': 'md5:e03b909557865076822aa169218d6a5d',
1227 'duration': 10995,
1228 'upload_date': '20161111',
1229 'uploader': 'Team PGP',
1230 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1231 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1232 },
1233 }, {
1234 'info_dict': {
545cc85d 1235 'id': '6N2fdlP3C5U',
cf7e015f 1236 'ext': 'mp4',
545cc85d 1237 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1238 'description': 'md5:e03b909557865076822aa169218d6a5d',
1239 'duration': 10990,
1240 'upload_date': '20161111',
1241 'uploader': 'Team PGP',
1242 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1243 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1244 },
1245 }],
1246 'params': {
1247 'skip_download': True,
1248 },
65c2fde2 1249 'skip': 'Not multifeed anymore',
cbaed4bb 1250 },
f9f49d87 1251 {
067aa17e 1252 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1253 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1254 'info_dict': {
1255 'id': 'gVfLd0zydlo',
1256 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1257 },
1258 'playlist_count': 2,
be49068d 1259 'skip': 'Not multifeed anymore',
f9f49d87 1260 },
cbaed4bb 1261 {
2d3d2997 1262 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1263 'only_matching': True,
0e49d9a6 1264 },
6d4fc66b 1265 {
2d3d2997 1266 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1267 'only_matching': True,
1268 },
0e49d9a6 1269 {
067aa17e 1270 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1271 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1272 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1273 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1274 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1275 'info_dict': {
1276 'id': 'lsguqyKfVQg',
1277 'ext': 'mp4',
1278 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1279 'alt_title': 'Dark Walk',
0e49d9a6 1280 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1281 'duration': 133,
0e49d9a6
LL
1282 'upload_date': '20151119',
1283 'uploader_id': 'IronSoulElf',
ec85ded8 1284 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1285 'uploader': 'IronSoulElf',
11f9be09 1286 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1287 'track': 'Dark Walk',
1288 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1289 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1290 },
1291 'params': {
1292 'skip_download': True,
1293 },
1294 },
61f92af1 1295 {
067aa17e 1296 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1297 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1298 'only_matching': True,
1299 },
313dfc45
LL
1300 {
1301 # Video with yt:stretch=17:0
1302 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1303 'info_dict': {
1304 'id': 'Q39EVAstoRM',
1305 'ext': 'mp4',
1306 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1307 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1308 'upload_date': '20151107',
1309 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1310 'uploader': 'CH GAMER DROID',
1311 },
1312 'params': {
1313 'skip_download': True,
1314 },
be49068d 1315 'skip': 'This video does not exist.',
313dfc45 1316 },
201c1459 1317 {
1318 # Video with incomplete 'yt:stretch=16:'
1319 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1320 'only_matching': True,
1321 },
7caf9830
S
1322 {
1323 # Video licensed under Creative Commons
1324 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1325 'info_dict': {
1326 'id': 'M4gD1WSo5mA',
1327 'ext': 'mp4',
1328 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1329 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1330 'duration': 721,
7caf9830
S
1331 'upload_date': '20150127',
1332 'uploader_id': 'BerkmanCenter',
ec85ded8 1333 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1334 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1335 'license': 'Creative Commons Attribution license (reuse allowed)',
1336 },
1337 'params': {
1338 'skip_download': True,
1339 },
1340 },
fd050249
S
1341 {
1342 # Channel-like uploader_url
1343 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1344 'info_dict': {
1345 'id': 'eQcmzGIKrzg',
1346 'ext': 'mp4',
1347 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1348 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1349 'duration': 4060,
fd050249 1350 'upload_date': '20151119',
eb6793ba 1351 'uploader': 'Bernie Sanders',
fd050249 1352 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1353 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1354 'license': 'Creative Commons Attribution license (reuse allowed)',
1355 },
1356 'params': {
1357 'skip_download': True,
1358 },
1359 },
040ac686
S
1360 {
1361 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1362 'only_matching': True,
7f29cf54
S
1363 },
1364 {
067aa17e 1365 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1366 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1367 'only_matching': True,
6496ccb4
S
1368 },
1369 {
1370 # Rental video preview
1371 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1372 'info_dict': {
1373 'id': 'uGpuVWrhIzE',
1374 'ext': 'mp4',
1375 'title': 'Piku - Trailer',
1376 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1377 'upload_date': '20150811',
1378 'uploader': 'FlixMatrix',
1379 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1380 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1381 'license': 'Standard YouTube License',
1382 },
1383 'params': {
1384 'skip_download': True,
1385 },
eb6793ba 1386 'skip': 'This video is not available.',
022a5d66 1387 },
12afdc2a
S
1388 {
1389 # YouTube Red video with episode data
1390 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1391 'info_dict': {
1392 'id': 'iqKdEhx-dD4',
1393 'ext': 'mp4',
1394 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1395 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1396 'duration': 2085,
12afdc2a
S
1397 'upload_date': '20170118',
1398 'uploader': 'Vsauce',
1399 'uploader_id': 'Vsauce',
1400 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1401 'series': 'Mind Field',
1402 'season_number': 1,
1403 'episode_number': 1,
1404 },
1405 'params': {
1406 'skip_download': True,
1407 },
1408 'expected_warnings': [
1409 'Skipping DASH manifest',
1410 ],
1411 },
c7121fa7
S
1412 {
1413 # The following content has been identified by the YouTube community
1414 # as inappropriate or offensive to some audiences.
1415 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1416 'info_dict': {
1417 'id': '6SJNVb0GnPI',
1418 'ext': 'mp4',
1419 'title': 'Race Differences in Intelligence',
1420 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1421 'duration': 965,
1422 'upload_date': '20140124',
1423 'uploader': 'New Century Foundation',
1424 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1425 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1426 },
1427 'params': {
1428 'skip_download': True,
1429 },
545cc85d 1430 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1431 },
022a5d66
S
1432 {
1433 # itag 212
1434 'url': '1t24XAntNCY',
1435 'only_matching': True,
fd5c4aab
S
1436 },
1437 {
1438 # geo restricted to JP
1439 'url': 'sJL6WA-aGkQ',
1440 'only_matching': True,
1441 },
cd5a74a2
S
1442 {
1443 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1444 'only_matching': True,
1445 },
bc2ca1bb 1446 {
1447 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1448 'only_matching': True,
1449 },
1450 {
1451 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1452 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1453 'only_matching': True,
1454 },
825cd268
RA
1455 {
1456 # DRM protected
1457 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1458 'only_matching': True,
4fe54c12
S
1459 },
1460 {
1461 # Video with unsupported adaptive stream type formats
1462 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1463 'info_dict': {
1464 'id': 'Z4Vy8R84T1U',
1465 'ext': 'mp4',
1466 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1467 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1468 'duration': 433,
1469 'upload_date': '20130923',
1470 'uploader': 'Amelia Putri Harwita',
1471 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1472 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1473 'formats': 'maxcount:10',
1474 },
1475 'params': {
1476 'skip_download': True,
1477 'youtube_include_dash_manifest': False,
1478 },
5429d6a9 1479 'skip': 'not actual anymore',
5caabd3c 1480 },
1481 {
822b9d9c 1482 # Youtube Music Auto-generated description
5caabd3c 1483 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1484 'info_dict': {
1485 'id': 'MgNrAu2pzNs',
1486 'ext': 'mp4',
1487 'title': 'Voyeur Girl',
1488 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1489 'upload_date': '20190312',
5429d6a9
S
1490 'uploader': 'Stephen - Topic',
1491 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1492 'artist': 'Stephen',
1493 'track': 'Voyeur Girl',
1494 'album': 'it\'s too much love to know my dear',
1495 'release_date': '20190313',
1496 'release_year': 2019,
1497 },
1498 'params': {
1499 'skip_download': True,
1500 },
1501 },
66b48727
RA
1502 {
1503 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1504 'only_matching': True,
1505 },
011e75e6
S
1506 {
1507 # invalid -> valid video id redirection
1508 'url': 'DJztXj2GPfl',
1509 'info_dict': {
1510 'id': 'DJztXj2GPfk',
1511 'ext': 'mp4',
1512 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1513 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1514 'upload_date': '20090125',
1515 'uploader': 'Prochorowka',
1516 'uploader_id': 'Prochorowka',
1517 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1518 'artist': 'Panjabi MC',
1519 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1520 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1521 },
1522 'params': {
1523 'skip_download': True,
1524 },
545cc85d 1525 'skip': 'Video unavailable',
ea74e00b
DP
1526 },
1527 {
1528 # empty description results in an empty string
1529 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1530 'info_dict': {
1531 'id': 'x41yOUIvK2k',
1532 'ext': 'mp4',
1533 'title': 'IMG 3456',
1534 'description': '',
1535 'upload_date': '20170613',
1536 'uploader_id': 'ElevageOrVert',
1537 'uploader': 'ElevageOrVert',
1538 },
1539 'params': {
1540 'skip_download': True,
1541 },
1542 },
a0566bbf 1543 {
29f7c58a 1544 # with '};' inside yt initial data (see [1])
1545 # see [2] for an example with '};' inside ytInitialPlayerResponse
1546 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1547 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1548 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1549 'info_dict': {
1550 'id': 'CHqg6qOn4no',
1551 'ext': 'mp4',
1552 'title': 'Part 77 Sort a list of simple types in c#',
1553 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1554 'upload_date': '20130831',
1555 'uploader_id': 'kudvenkat',
1556 'uploader': 'kudvenkat',
1557 },
1558 'params': {
1559 'skip_download': True,
1560 },
1561 },
29f7c58a 1562 {
1563 # another example of '};' in ytInitialData
1564 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1565 'only_matching': True,
1566 },
1567 {
1568 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1569 'only_matching': True,
1570 },
545cc85d 1571 {
cc2db878 1572 # https://github.com/ytdl-org/youtube-dl/pull/28094
1573 'url': 'OtqTfy26tG0',
1574 'info_dict': {
1575 'id': 'OtqTfy26tG0',
1576 'ext': 'mp4',
1577 'title': 'Burn Out',
1578 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1579 'upload_date': '20141120',
1580 'uploader': 'The Cinematic Orchestra - Topic',
1581 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1582 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1583 'artist': 'The Cinematic Orchestra',
1584 'track': 'Burn Out',
1585 'album': 'Every Day',
1586 'release_data': None,
1587 'release_year': None,
1588 },
1589 'params': {
1590 'skip_download': True,
1591 },
545cc85d 1592 },
bc2ca1bb 1593 {
1594 # controversial video, only works with bpctr when authenticated with cookies
1595 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1596 'only_matching': True,
1597 },
a1a7907b 1598 {
1599 # controversial video, requires bpctr/contentCheckOk
1600 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1601 'info_dict': {
1602 'id': 'SZJvDhaSDnc',
1603 'ext': 'mp4',
1604 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1605 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1606 'uploader': 'CBS This Morning',
11f9be09 1607 'uploader_id': 'CBSThisMorning',
a1a7907b 1608 'upload_date': '20140716',
1609 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1610 }
1611 },
f7ad7160 1612 {
1613 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1614 'url': 'cBvYw8_A0vQ',
1615 'info_dict': {
1616 'id': 'cBvYw8_A0vQ',
1617 'ext': 'mp4',
1618 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1619 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1620 'upload_date': '20201120',
1621 'uploader': 'Walk around Japan',
1622 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1623 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1624 },
1625 'params': {
1626 'skip_download': True,
1627 },
0fb983f6 1628 }, {
1629 # Has multiple audio streams
1630 'url': 'WaOKSUlf4TM',
1631 'only_matching': True
9297939e 1632 }, {
1633 # Requires Premium: has format 141 when requested using YTM url
1634 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1635 'only_matching': True
1636 }, {
120916da 1637 # multiple subtitles with same lang_code
1638 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1639 'only_matching': True,
109dd3b2 1640 }, {
1641 # Force use android client fallback
1642 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1643 'info_dict': {
1644 'id': 'YOelRv7fMxY',
11f9be09 1645 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1646 'ext': '3gp',
1647 'upload_date': '20210624',
1648 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1649 'uploader': 'colinfurze',
11f9be09 1650 'uploader_id': 'colinfurze',
109dd3b2 1651 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1652 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1653 },
1654 'params': {
1655 'format': '17', # 3gp format available on android
1656 'extractor_args': {'youtube': {'player_client': ['android']}},
1657 },
120916da 1658 },
109dd3b2 1659 {
1660 # Skip download of additional client configs (remix client config in this case)
1661 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1662 'only_matching': True,
1663 'params': {
1664 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1665 },
8fc54b12 1666 }, {
1667 # shorts
1668 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1669 'only_matching': True,
1670 },
2eb88d95
PH
1671 ]
1672
201c1459 1673 @classmethod
1674 def suitable(cls, url):
4dfbf869 1675 from ..utils import parse_qs
1676
201c1459 1677 qs = parse_qs(url)
1678 if qs.get('list', [None])[0]:
1679 return False
1680 return super(YoutubeIE, cls).suitable(url)
1681
e0df6211
PH
1682 def __init__(self, *args, **kwargs):
1683 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1684 self._code_cache = {}
83799698 1685 self._player_cache = {}
e0df6211 1686
b6de707d 1687 def _extract_player_url(self, *ytcfgs, webpage=None):
1688 player_url = traverse_obj(
1689 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1690 get_all=False, expected_type=compat_str)
11f9be09 1691 if not player_url:
b6de707d 1692 return
109dd3b2 1693 if player_url.startswith('//'):
1694 player_url = 'https:' + player_url
1695 elif not re.match(r'https?://', player_url):
1696 player_url = compat_urlparse.urljoin(
1697 'https://www.youtube.com', player_url)
1698 return player_url
1699
b6de707d 1700 def _download_player_url(self, video_id, fatal=False):
1701 res = self._download_webpage(
1702 'https://www.youtube.com/iframe_api',
1703 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1704 if res:
1705 player_version = self._search_regex(
1706 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1707 if player_version:
1708 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1709
60064c53
PH
1710 def _signature_cache_id(self, example_sig):
1711 """ Return a string representation of a signature """
78caa52a 1712 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1713
e40c758c
S
1714 @classmethod
1715 def _extract_player_info(cls, player_url):
1716 for player_re in cls._PLAYER_INFO_RE:
1717 id_m = re.search(player_re, player_url)
1718 if id_m:
1719 break
1720 else:
c081b35c 1721 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1722 return id_m.group('id')
e40c758c 1723
404f611f 1724 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 1725 player_id = self._extract_player_info(player_url)
1726 if player_id not in self._code_cache:
1276a43a 1727 code = self._download_webpage(
109dd3b2 1728 player_url, video_id, fatal=fatal,
1729 note='Downloading player ' + player_id,
1730 errnote='Download of %s failed' % player_url)
1276a43a 1731 if code:
1732 self._code_cache[player_id] = code
404f611f 1733 return self._code_cache.get(player_id)
109dd3b2 1734
e40c758c 1735 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1736 player_id = self._extract_player_info(player_url)
e0df6211 1737
c4417ddb 1738 # Read from filesystem cache
545cc85d 1739 func_id = 'js_%s_%s' % (
1740 player_id, self._signature_cache_id(example_sig))
c4417ddb 1741 assert os.path.basename(func_id) == func_id
a0e07d31 1742
69ea8ca4 1743 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1744 if cache_spec is not None:
78caa52a 1745 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1746
404f611f 1747 code = self._load_player(video_id, player_url)
1748 if code:
109dd3b2 1749 res = self._parse_sig_js(code)
e0df6211 1750
109dd3b2 1751 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1752 cache_res = res(test_string)
1753 cache_spec = [ord(c) for c in cache_res]
83799698 1754
109dd3b2 1755 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1756 return res
83799698 1757
60064c53 1758 def _print_sig_code(self, func, example_sig):
404f611f 1759 if not self.get_param('youtube_print_sig_code'):
1760 return
1761
edf3e38e
PH
1762 def gen_sig_code(idxs):
1763 def _genslice(start, end, step):
78caa52a 1764 starts = '' if start == 0 else str(start)
8bcc8756 1765 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1766 steps = '' if step == 1 else (':%d' % step)
78caa52a 1767 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1768
1769 step = None
7af808a5
PH
1770 # Quelch pyflakes warnings - start will be set when step is set
1771 start = '(Never used)'
edf3e38e
PH
1772 for i, prev in zip(idxs[1:], idxs[:-1]):
1773 if step is not None:
1774 if i - prev == step:
1775 continue
1776 yield _genslice(start, prev, step)
1777 step = None
1778 continue
1779 if i - prev in [-1, 1]:
1780 step = i - prev
1781 start = prev
1782 continue
1783 else:
78caa52a 1784 yield 's[%d]' % prev
edf3e38e 1785 if step is None:
78caa52a 1786 yield 's[%d]' % i
edf3e38e
PH
1787 else:
1788 yield _genslice(start, i, step)
1789
78caa52a 1790 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1791 cache_res = func(test_string)
edf3e38e 1792 cache_spec = [ord(c) for c in cache_res]
78caa52a 1793 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1794 signature_id_tuple = '(%s)' % (
1795 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1796 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1797 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1798 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1799
e0df6211
PH
1800 def _parse_sig_js(self, jscode):
1801 funcname = self._search_regex(
abefc03f
S
1802 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1803 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
1804 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1805 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1806 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1807 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1808 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1809 # Obsolete patterns
1810 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1811 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1812 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1813 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1814 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1815 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1816 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1817 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1818 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1819
1820 jsi = JSInterpreter(jscode)
1821 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1822 return lambda s: initial_function([s])
1823
545cc85d 1824 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1825 """Turn the encrypted s field into a working signature"""
6b37f0be 1826
c8bf86d5 1827 if player_url is None:
69ea8ca4 1828 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1829
c8bf86d5 1830 try:
62af3a0e 1831 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1832 if player_id not in self._player_cache:
1833 func = self._extract_signature_function(
60064c53 1834 video_id, player_url, s
c8bf86d5
PH
1835 )
1836 self._player_cache[player_id] = func
1837 func = self._player_cache[player_id]
404f611f 1838 self._print_sig_code(func, s)
c8bf86d5
PH
1839 return func(s)
1840 except Exception as e:
404f611f 1841 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
1842
1843 def _decrypt_nsig(self, s, video_id, player_url):
1844 """Turn the encrypted n field into a working signature"""
1845 if player_url is None:
1846 raise ExtractorError('Cannot decrypt nsig without player_url')
1847 if player_url.startswith('//'):
1848 player_url = 'https:' + player_url
1849 elif not re.match(r'https?://', player_url):
1850 player_url = compat_urlparse.urljoin(
1851 'https://www.youtube.com', player_url)
1852
1853 sig_id = ('nsig_value', s)
1854 if sig_id in self._player_cache:
1855 return self._player_cache[sig_id]
1856
1857 try:
1858 player_id = ('nsig', player_url)
1859 if player_id not in self._player_cache:
1860 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
1861 func = self._player_cache[player_id]
1862 self._player_cache[sig_id] = func(s)
1863 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
1864 return self._player_cache[sig_id]
1865 except Exception as e:
1866 raise ExtractorError(traceback.format_exc(), cause=e)
1867
1868 def _extract_n_function_name(self, jscode):
1869 return self._search_regex(
1870 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
1871 jscode, 'Initial JS player n function name', group='nfunc')
1872
1873 def _extract_n_function(self, video_id, player_url):
1874 player_id = self._extract_player_info(player_url)
1875 func_code = self._downloader.cache.load('youtube-nsig', player_id)
1876
1877 if func_code:
1878 jsi = JSInterpreter(func_code)
1879 else:
1880 jscode = self._load_player(video_id, player_url)
1881 funcname = self._extract_n_function_name(jscode)
1882 jsi = JSInterpreter(jscode)
1883 func_code = jsi.extract_function_code(funcname)
1884 self._downloader.cache.store('youtube-nsig', player_id, func_code)
1885
1886 if self.get_param('youtube_print_sig_code'):
1887 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
1888
1889 return lambda s: jsi.extract_function_from_code(*func_code)([s])
e0df6211 1890
109dd3b2 1891 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1892 """
1893 Extract signatureTimestamp (sts)
1894 Required to tell API what sig/player version is in use.
1895 """
1896 sts = None
1897 if isinstance(ytcfg, dict):
1898 sts = int_or_none(ytcfg.get('STS'))
1899
1900 if not sts:
1901 # Attempt to extract from player
1902 if player_url is None:
1903 error_msg = 'Cannot extract signature timestamp without player_url.'
1904 if fatal:
1905 raise ExtractorError(error_msg)
1906 self.report_warning(error_msg)
1907 return
404f611f 1908 code = self._load_player(video_id, player_url, fatal=fatal)
1909 if code:
109dd3b2 1910 sts = int_or_none(self._search_regex(
1911 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1912 'JS player signature timestamp', group='sts', fatal=fatal))
1913 return sts
1914
11f9be09 1915 def _mark_watched(self, video_id, player_responses):
352d63fd 1916 playback_url = traverse_obj(
1917 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1918 expected_type=url_or_none, get_all=False)
d77ab8e2 1919 if not playback_url:
352d63fd 1920 self.report_warning('Unable to mark watched')
d77ab8e2
S
1921 return
1922 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1923 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1924
1925 # cpn generation algorithm is reverse engineered from base.js.
1926 # In fact it works even with dummy cpn.
1927 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1928 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1929
1930 qs.update({
1931 'ver': ['2'],
1932 'cpn': [cpn],
1933 })
1934 playback_url = compat_urlparse.urlunparse(
15707c7e 1935 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1936
1937 self._download_webpage(
1938 playback_url, video_id, 'Marking watched',
1939 'Unable to mark watched', fatal=False)
1940
66c9fa36
S
1941 @staticmethod
1942 def _extract_urls(webpage):
1943 # Embedded YouTube player
1944 entries = [
1945 unescapeHTML(mobj.group('url'))
1946 for mobj in re.finditer(r'''(?x)
1947 (?:
1948 <iframe[^>]+?src=|
1949 data-video-url=|
1950 <embed[^>]+?src=|
1951 embedSWF\(?:\s*|
1952 <object[^>]+data=|
1953 new\s+SWFObject\(
1954 )
1955 (["\'])
1956 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1957 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1958 \1''', webpage)]
1959
1960 # lazyYT YouTube embed
1961 entries.extend(list(map(
1962 unescapeHTML,
1963 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1964
1965 # Wordpress "YouTube Video Importer" plugin
1966 matches = re.findall(r'''(?x)<div[^>]+
1967 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1968 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1969 entries.extend(m[-1] for m in matches)
1970
1971 return entries
1972
1973 @staticmethod
1974 def _extract_url(webpage):
1975 urls = YoutubeIE._extract_urls(webpage)
1976 return urls[0] if urls else None
1977
97665381
PH
1978 @classmethod
1979 def extract_id(cls, url):
1980 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1981 if mobj is None:
69ea8ca4 1982 raise ExtractorError('Invalid URL: %s' % url)
5ad28e7f 1983 return mobj.group('id')
c5e8d7af 1984
7c365c21 1985 def _extract_chapters_from_json(self, data, duration):
1986 chapter_list = traverse_obj(
1987 data, (
1988 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
1989 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
1990 ), expected_type=list)
1991
1992 return self._extract_chapters(
1993 chapter_list,
1994 chapter_time=lambda chapter: float_or_none(
1995 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
1996 chapter_title=lambda chapter: traverse_obj(
1997 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
1998 duration=duration)
1999
2000 def _extract_chapters_from_engagement_panel(self, data, duration):
2001 content_list = traverse_obj(
8bdd16b4 2002 data,
7c365c21 2003 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2004 expected_type=list, default=[])
052e1350 2005 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2006 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2007
2008 return next((
2009 filter(None, (
2010 self._extract_chapters(
2011 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2012 chapter_time, chapter_title, duration)
2013 for contents in content_list
2014 ))), [])
2015
2016 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2017 chapters = []
7c365c21 2018 last_chapter = {'start_time': 0}
2019 for idx, chapter in enumerate(chapter_list or []):
2020 title = chapter_title(chapter)
84213ea8
S
2021 start_time = chapter_time(chapter)
2022 if start_time is None:
2023 continue
7c365c21 2024 last_chapter['end_time'] = start_time
2025 if start_time < last_chapter['start_time']:
2026 if idx == 1:
2027 chapters.pop()
2028 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2029 else:
2030 self.report_warning(f'Invalid start time for chapter "{title}"')
2031 continue
2032 last_chapter = {'start_time': start_time, 'title': title}
2033 chapters.append(last_chapter)
2034 last_chapter['end_time'] = duration
84213ea8
S
2035 return chapters
2036
545cc85d 2037 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2038 return self._parse_json(self._search_regex(
2039 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2040 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2041
d92f5d5a 2042 @staticmethod
2043 def parse_time_text(time_text):
2044 """
2045 Parse the comment time text
2046 time_text is in the format 'X units ago (edited)'
2047 """
2048 time_text_split = time_text.split(' ')
2049 if len(time_text_split) >= 3:
da503b7a 2050 try:
2051 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2052 except ValueError:
2053 return None
d92f5d5a 2054
a1c5d2ca
M
2055 def _extract_comment(self, comment_renderer, parent=None):
2056 comment_id = comment_renderer.get('commentId')
2057 if not comment_id:
2058 return
fe93e2c4 2059
052e1350 2060 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2061
49bd8c66 2062 # note: timestamp is an estimate calculated from the current time and time_text
052e1350 2063 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
fe93e2c4 2064 time_text_dt = self.parse_time_text(time_text)
2065 if isinstance(time_text_dt, datetime.datetime):
2066 timestamp = calendar.timegm(time_text_dt.timetuple())
052e1350 2067 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2068 author_id = try_get(comment_renderer,
2069 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2070
49bd8c66 2071 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2072 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2073 author_thumbnail = try_get(comment_renderer,
2074 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2075
2076 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2077 is_favorited = 'creatorHeart' in (try_get(
2078 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2079 return {
2080 'id': comment_id,
2081 'text': text,
d92f5d5a 2082 'timestamp': timestamp,
a1c5d2ca
M
2083 'time_text': time_text,
2084 'like_count': votes,
97524332 2085 'is_favorited': is_favorited,
a1c5d2ca
M
2086 'author': author,
2087 'author_id': author_id,
2088 'author_thumbnail': author_thumbnail,
2089 'author_is_uploader': author_is_uploader,
2090 'parent': parent or 'root'
2091 }
2092
99e9e001 2093 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None):
2d6659b9 2094
2095 def extract_header(contents):
2d6659b9 2096 _continuation = None
2097 for content in contents:
2098 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2099 expected_comment_count = parse_count(self._get_text(
052e1350 2100 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
fe93e2c4 2101
2d6659b9 2102 if expected_comment_count:
fe93e2c4 2103 comment_counts[1] = expected_comment_count
2104 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2105 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2106 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2107
2108 sort_menu_item = try_get(
2109 comments_header_renderer,
2110 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2111 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2112
2113 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2114 if not _continuation:
2115 continue
2116
2117 sort_text = sort_menu_item.get('title')
2118 if isinstance(sort_text, compat_str):
2119 sort_text = sort_text.lower()
2120 else:
2121 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2122 self.to_screen('Sorting comments by %s' % sort_text)
2123 break
a2160aa4 2124 return _continuation
a1c5d2ca 2125
2d6659b9 2126 def extract_thread(contents):
a1c5d2ca
M
2127 if not parent:
2128 comment_counts[2] = 0
2129 for content in contents:
2130 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2131 comment_renderer = try_get(
2132 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2133 content, (lambda x: x['commentRenderer'], dict))
2134
2135 if not comment_renderer:
2136 continue
2137 comment = self._extract_comment(comment_renderer, parent)
2138 if not comment:
2139 continue
2140 comment_counts[0] += 1
2141 yield comment
2142 # Attempt to get the replies
2143 comment_replies_renderer = try_get(
2144 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2145
2146 if comment_replies_renderer:
2147 comment_counts[2] += 1
2148 comment_entries_iter = self._comment_entries(
99e9e001 2149 comment_replies_renderer, ytcfg, video_id,
2150 parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2151
2152 for reply_comment in comment_entries_iter:
2153 yield reply_comment
2154
2d6659b9 2155 # YouTube comments have a max depth of 2
2156 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2157 if max_depth == 1 and parent:
2158 return
a1c5d2ca
M
2159 if not comment_counts:
2160 # comment so far, est. total comments, current comment thread #
2161 comment_counts = [0, 0, 0]
a1c5d2ca 2162
2d6659b9 2163 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2164 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2165 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2166 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2167 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2168
aae16f6e 2169 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2170 if message and not parent:
2171 self.report_warning(message, video_id=video_id)
2172
2d6659b9 2173 visitor_data = None
2174 is_first_continuation = parent is None
a1c5d2ca
M
2175
2176 for page_num in itertools.count(0):
2177 if not continuation:
2178 break
99e9e001 2179 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
2d6659b9 2180 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2181 if page_num == 0:
2182 if is_first_continuation:
2183 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2184 else:
2d6659b9 2185 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2186 comment_counts[2], comment_prog_str)
2187 else:
2188 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2189 ' ' if parent else '', ' replies' if parent else '',
2190 page_num, comment_prog_str)
2191
2192 response = self._extract_response(
fe93e2c4 2193 item_id=None, query=continuation,
2d6659b9 2194 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2195 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2196 if not response:
2197 break
f4f751af 2198 visitor_data = try_get(
2199 response,
2200 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2201 compat_str) or visitor_data
a1c5d2ca 2202
2d6659b9 2203 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2204
2d6659b9 2205 continuation = None
2206 if isinstance(continuation_contents, list):
2207 for continuation_section in continuation_contents:
2208 if not isinstance(continuation_section, dict):
2209 continue
2210 continuation_items = try_get(
2211 continuation_section,
2212 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2213 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2214 list) or []
2215 if is_first_continuation:
a2160aa4 2216 continuation = extract_header(continuation_items)
2d6659b9 2217 is_first_continuation = False
2218 if continuation:
2219 break
2220 continue
2221 count = 0
2222 for count, entry in enumerate(extract_thread(continuation_items)):
2223 yield entry
2224 continuation = self._extract_continuation({'contents': continuation_items})
2225 if continuation:
2226 # Sometimes YouTube provides a continuation without any comments
2227 # In most cases we end up just downloading these with very little comments to come.
2228 if count == 0:
2229 if not parent:
2230 self.report_warning('No comments received - assuming end of comments')
2231 continuation = None
a1c5d2ca
M
2232 break
2233
2d6659b9 2234 # Deprecated response structure
2235 elif isinstance(continuation_contents, dict):
2236 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2237 for key, continuation_renderer in continuation_contents.items():
2238 if key not in known_continuation_renderers:
2239 continue
2240 if not isinstance(continuation_renderer, dict):
2241 continue
2242 if is_first_continuation:
2243 header_continuation_items = [continuation_renderer.get('header') or {}]
a2160aa4 2244 continuation = extract_header(header_continuation_items)
2d6659b9 2245 is_first_continuation = False
2246 if continuation:
2247 break
a1c5d2ca 2248
2d6659b9 2249 # Sometimes YouTube provides a continuation without any comments
2250 # In most cases we end up just downloading these with very little comments to come.
2251 count = 0
2252 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2253 yield entry
2254 continuation = self._extract_continuation(continuation_renderer)
2255 if count == 0:
2256 if not parent:
2257 self.report_warning('No comments received - assuming end of comments')
2258 continuation = None
2259 break
a1c5d2ca 2260
2d6659b9 2261 @staticmethod
2262 def _generate_comment_continuation(video_id):
2263 """
2264 Generates initial comment section continuation token from given video id
2265 """
2266 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2267 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2268 new_continuation_intlist = list(itertools.chain.from_iterable(
2269 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2270 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2271
a2160aa4 2272 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2273 """Entry for comment extraction"""
2d6659b9 2274 def _real_comment_extract(contents):
aae16f6e 2275 renderer = next((
2276 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2277 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2278 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 2279
a2160aa4 2280 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
65524694 2281 # Force English regardless of account setting to prevent parsing issues
2282 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2283 ytcfg = copy.deepcopy(ytcfg)
2284 traverse_obj(
2285 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
a2160aa4 2286 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 2287
109dd3b2 2288 @staticmethod
99e9e001 2289 def _get_checkok_params():
2290 return {'contentCheckOk': True, 'racyCheckOk': True}
2291
2292 @classmethod
2293 def _generate_player_context(cls, sts=None):
109dd3b2 2294 context = {
2295 'html5Preference': 'HTML5_PREF_WANTS',
2296 }
2297 if sts is not None:
2298 context['signatureTimestamp'] = sts
2299 return {
2300 'playbackContext': {
2301 'contentPlaybackContext': context
a1a7907b 2302 },
99e9e001 2303 **cls._get_checkok_params()
109dd3b2 2304 }
2305
e7e94f2a
D
2306 @staticmethod
2307 def _is_agegated(player_response):
2308 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 2309 return True
e7e94f2a
D
2310
2311 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2312 AGE_GATE_REASONS = (
2313 'confirm your age', 'age-restricted', 'inappropriate', # reason
2314 'age_verification_required', 'age_check_required', # status
2315 )
2316 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2317
2318 @staticmethod
2319 def _is_unplayable(player_response):
2320 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 2321
99e9e001 2322 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
109dd3b2 2323
11f9be09 2324 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2325 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 2326 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 2327 headers = self.generate_api_headers(
99e9e001 2328 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 2329
11f9be09 2330 yt_query = {'videoId': video_id}
2331 yt_query.update(self._generate_player_context(sts))
2332 return self._extract_response(
2333 item_id=video_id, ep='player', query=yt_query,
379e44ed 2334 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 2335 default_client=client,
11f9be09 2336 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2337 ) or None
2338
11f9be09 2339 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2340 requested_clients = []
000c15a4 2341 allowed_clients = sorted(
2342 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2343 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 2344 for client in self._configuration_arg('player_client'):
2345 if client in allowed_clients:
2346 requested_clients.append(client)
2347 elif client == 'all':
2348 requested_clients.extend(allowed_clients)
2349 else:
2350 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2351 if not requested_clients:
2352 requested_clients = ['android', 'web']
cf7e015f 2353
11f9be09 2354 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2355 requested_clients.extend(
e7e94f2a 2356 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 2357
11f9be09 2358 return orderedSet(requested_clients)
cf7e015f 2359
c0bc527b
M
2360 def _extract_player_ytcfg(self, client, video_id):
2361 url = {
2362 'web_music': 'https://music.youtube.com',
2363 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2364 }.get(client)
2365 if not url:
2366 return {}
2367 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2368 return self.extract_ytcfg(video_id, webpage) or {}
2369
99e9e001 2370 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
11f9be09 2371 initial_pr = None
2372 if webpage:
2373 initial_pr = self._extract_yt_initial_variable(
2374 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2375 video_id, 'initial player response')
6b09401b 2376
c0bc527b
M
2377 original_clients = clients
2378 clients = clients[::-1]
b6de707d 2379 prs = []
e7e94f2a
D
2380
2381 def append_client(client_name):
2382 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2383 clients.append(client_name)
2384
379e44ed 2385 # Android player_response does not have microFormats which are needed for
2386 # extraction of some data. So we return the initial_pr with formats
2387 # stripped out even if not requested by the user
2388 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 2389 if initial_pr:
2390 pr = dict(initial_pr)
2391 pr['streamingData'] = None
b6de707d 2392 prs.append(pr)
379e44ed 2393
2394 last_error = None
b6de707d 2395 tried_iframe_fallback = False
2396 player_url = None
c0bc527b
M
2397 while clients:
2398 client = clients.pop()
11f9be09 2399 player_ytcfg = master_ytcfg if client == 'web' else {}
c0bc527b
M
2400 if 'configs' not in self._configuration_arg('player_skip'):
2401 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
c0bc527b 2402
b6de707d 2403 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2404 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2405 if 'js' in self._configuration_arg('player_skip'):
2406 require_js_player = False
2407 player_url = None
2408
2409 if not player_url and not tried_iframe_fallback and require_js_player:
2410 player_url = self._download_player_url(video_id)
2411 tried_iframe_fallback = True
2412
379e44ed 2413 try:
2414 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
99e9e001 2415 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
379e44ed 2416 except ExtractorError as e:
2417 if last_error:
2418 self.report_warning(last_error)
2419 last_error = e
2420 continue
2421
11f9be09 2422 if pr:
b6de707d 2423 prs.append(pr)
c0bc527b 2424
e7e94f2a 2425 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
99e9e001 2426 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
e7e94f2a
D
2427 append_client(client.replace('_agegate', '_creator'))
2428 elif self._is_agegated(pr):
2429 append_client(f'{client}_agegate')
c0bc527b 2430
379e44ed 2431 if last_error:
b6de707d 2432 if not len(prs):
379e44ed 2433 raise last_error
2434 self.report_warning(last_error)
b6de707d 2435 return prs, player_url
11f9be09 2436
2437 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
a0bb6ce5 2438 itags, stream_ids = {}, []
2a9c6dcd 2439 itag_qualities, res_qualities = {}, {}
d3fc8074 2440 q = qualities([
2a9c6dcd 2441 # Normally tiny is the smallest video-only formats. But
2442 # audio-only formats with unknown quality may get tagged as tiny
2443 'tiny',
2444 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2445 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2446 ])
11f9be09 2447 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2448
545cc85d 2449 for fmt in streaming_formats:
2450 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2451 continue
321bf820 2452
cc2db878 2453 itag = str_or_none(fmt.get('itag'))
9297939e 2454 audio_track = fmt.get('audioTrack') or {}
2455 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2456 if stream_id in stream_ids:
2457 continue
2458
cc2db878 2459 quality = fmt.get('quality')
2a9c6dcd 2460 height = int_or_none(fmt.get('height'))
d3fc8074 2461 if quality == 'tiny' or not quality:
2462 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2463 # The 3gp format (17) in android client has a quality of "small",
2464 # but is actually worse than other formats
2465 if itag == '17':
2466 quality = 'tiny'
2467 if quality:
2468 if itag:
2469 itag_qualities[itag] = quality
2470 if height:
2471 res_qualities[height] = quality
cc2db878 2472 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2473 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2474 # number of fragment that would subsequently requested with (`&sq=N`)
2475 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2476 continue
2477
545cc85d 2478 fmt_url = fmt.get('url')
2479 if not fmt_url:
2480 sc = compat_parse_qs(fmt.get('signatureCipher'))
2481 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2482 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2483 if not (sc and fmt_url and encrypted_sig):
2484 continue
545cc85d 2485 if not player_url:
201e9eaa 2486 continue
545cc85d 2487 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2488 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2489 fmt_url += '&' + sp + '=' + signature
2490
404f611f 2491 query = parse_qs(fmt_url)
2492 throttled = False
2493 if query.get('ratebypass') != ['yes'] and query.get('n'):
2494 try:
2495 fmt_url = update_url_query(fmt_url, {
2496 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2497 except ExtractorError as e:
2498 self.report_warning(f'nsig extraction failed: You may experience throttling for some formats\n{e}', only_once=True)
2499 throttled = True
2500
545cc85d 2501 if itag:
a0bb6ce5 2502 itags[itag] = 'https'
9297939e 2503 stream_ids.append(stream_id)
2504
cc2db878 2505 tbr = float_or_none(
2506 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2507 dct = {
2508 'asr': int_or_none(fmt.get('audioSampleRate')),
2509 'filesize': int_or_none(fmt.get('contentLength')),
2510 'format_id': itag,
34921b43 2511 'format_note': join_nonempty(
26e8e044 2512 '%s%s' % (audio_track.get('displayName') or '',
2513 ' (default)' if audio_track.get('audioIsDefault') else ''),
404f611f 2514 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
34921b43 2515 throttled and 'THROTTLED', delim=', '),
c18d4482 2516 'source_preference': -10 if throttled else -1,
a4211baf 2517 'fps': int_or_none(fmt.get('fps')) or None,
2a9c6dcd 2518 'height': height,
dca3ff4a 2519 'quality': q(quality),
cc2db878 2520 'tbr': tbr,
545cc85d 2521 'url': fmt_url,
2a9c6dcd 2522 'width': int_or_none(fmt.get('width')),
0fb983f6 2523 'language': audio_track.get('id', '').split('.')[0],
26e8e044 2524 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
545cc85d 2525 }
60bdb7bd 2526 mime_mobj = re.match(
2527 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2528 if mime_mobj:
2529 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2530 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 2531 no_audio = dct.get('acodec') == 'none'
2532 no_video = dct.get('vcodec') == 'none'
2533 if no_audio:
2534 dct['vbr'] = tbr
2535 if no_video:
2536 dct['abr'] = tbr
2537 if no_audio or no_video:
545cc85d 2538 dct['downloader_options'] = {
2539 # Youtube throttles chunks >~10M
2540 'http_chunk_size': 10485760,
bf1317d2 2541 }
7c60c33e 2542 if dct.get('ext'):
2543 dct['container'] = dct['ext'] + '_dash'
11f9be09 2544 yield dct
545cc85d 2545
4bb6b02f 2546 skip_manifests = self._configuration_arg('skip')
57015a4a 2547 get_dash = (
2548 (not is_live or self._configuration_arg('include_live_dash'))
2549 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
5d3a0e79 2550 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2551
a0bb6ce5 2552 def process_manifest_format(f, proto, itag):
2553 if itag in itags:
2554 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2555 return False
2556 itag = f'{itag}-{proto}'
2557 if itag:
2558 f['format_id'] = itag
2559 itags[itag] = proto
2560
2561 f['quality'] = next((
2562 q(qdict[val])
2563 for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities))
2564 if val in qdict), -1)
2565 return True
2a9c6dcd 2566
11f9be09 2567 for sd in streaming_data:
5d3a0e79 2568 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2569 if hls_manifest_url:
2a9c6dcd 2570 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
a0bb6ce5 2571 if process_manifest_format(f, 'hls', self._search_regex(
2572 r'/itag/(\d+)', f['url'], 'itag', default=None)):
2573 yield f
545cc85d 2574
5d3a0e79 2575 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2576 if dash_manifest_url:
2a9c6dcd 2577 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
a0bb6ce5 2578 if process_manifest_format(f, 'dash', f['format_id']):
2579 f['filesize'] = int_or_none(self._search_regex(
2580 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2581 yield f
11f9be09 2582
2583 def _real_extract(self, url):
2584 url, smuggled_data = unsmuggle_url(url, {})
2585 video_id = self._match_id(url)
2586
2587 base_url = self.http_scheme() + '//www.youtube.com/'
2588 webpage_url = base_url + 'watch?v=' + video_id
b6de707d 2589 webpage = None
2590 if 'webpage' not in self._configuration_arg('player_skip'):
2591 webpage = self._download_webpage(
2592 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
11f9be09 2593
2594 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 2595
b6de707d 2596 player_responses, player_url = self._extract_player_responses(
11f9be09 2597 self._get_requested_clients(url, smuggled_data),
99e9e001 2598 video_id, webpage, master_ytcfg)
11f9be09 2599
352d63fd 2600 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
11f9be09 2601
2602 playability_statuses = traverse_obj(
2603 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2604
2605 trailer_video_id = get_first(
2606 playability_statuses,
2607 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2608 expected_type=str)
2609 if trailer_video_id:
2610 return self.url_result(
2611 trailer_video_id, self.ie_key(), trailer_video_id)
2612
2613 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2614 if webpage else (lambda x: None))
2615
2616 video_details = traverse_obj(
2617 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2618 microformats = traverse_obj(
2619 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2620 expected_type=dict, default=[])
2621 video_title = (
2622 get_first(video_details, 'title')
2623 or self._get_text(microformats, (..., 'title'))
2624 or search_meta(['og:title', 'twitter:title', 'title']))
2625 video_description = get_first(video_details, 'shortDescription')
2626
d89257f3 2627 multifeed_metadata_list = get_first(
2628 player_responses,
2629 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2630 expected_type=str)
2631 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2632 if self.get_param('noplaylist'):
11f9be09 2633 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 2634 else:
2635 entries = []
2636 feed_ids = []
2637 for feed in multifeed_metadata_list.split(','):
2638 # Unquote should take place before split on comma (,) since textual
2639 # fields may contain comma as well (see
2640 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2641 feed_data = compat_parse_qs(
2642 compat_urllib_parse_unquote_plus(feed))
2643
2644 def feed_entry(name):
2645 return try_get(
2646 feed_data, lambda x: x[name][0], compat_str)
2647
2648 feed_id = feed_entry('id')
2649 if not feed_id:
2650 continue
2651 feed_title = feed_entry('title')
2652 title = video_title
2653 if feed_title:
2654 title += ' (%s)' % feed_title
2655 entries.append({
2656 '_type': 'url_transparent',
2657 'ie_key': 'Youtube',
2658 'url': smuggle_url(
2659 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2660 {'force_singlefeed': True}),
2661 'title': title,
2662 })
2663 feed_ids.append(feed_id)
2664 self.to_screen(
2665 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2666 % (', '.join(feed_ids), video_id))
2667 return self.playlist_result(
2668 entries, video_id, video_title, video_description)
11f9be09 2669
7ea65411 2670 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2671 is_live = get_first(video_details, 'isLive')
7ea65411 2672 if is_live is None:
2673 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2674
2675 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2676 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2677
545cc85d 2678 if not formats:
11f9be09 2679 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 2680 self.report_drm(video_id)
11f9be09 2681 pemr = get_first(
2682 playability_statuses,
2683 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2684 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2685 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2686 if subreason:
545cc85d 2687 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2688 countries = get_first(microformats, 'availableCountries')
545cc85d 2689 if not countries:
2690 regions_allowed = search_meta('regionsAllowed')
2691 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2692 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2693 reason += f'. {subreason}'
545cc85d 2694 if reason:
b7da73eb 2695 self.raise_no_formats(reason, expected=True)
bf1317d2 2696
2a9c6dcd 2697 # Source is given priority since formats that throttle are given lower source_preference
2698 # When throttling issue is fully fixed, remove this
c18d4482 2699 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
bf1317d2 2700
11f9be09 2701 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2702 if not keywords and webpage:
2703 keywords = [
2704 unescapeHTML(m.group('content'))
2705 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2706 for keyword in keywords:
2707 if keyword.startswith('yt:stretch='):
201c1459 2708 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2709 if mobj:
2710 # NB: float is intentional for forcing float division
2711 w, h = (float(v) for v in mobj.groups())
2712 if w > 0 and h > 0:
2713 ratio = w / h
2714 for f in formats:
2715 if f.get('vcodec') != 'none':
2716 f['stretched_ratio'] = ratio
2717 break
6449cd80 2718
545cc85d 2719 thumbnails = []
11f9be09 2720 thumbnail_dicts = traverse_obj(
2721 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2722 expected_type=dict, default=[])
2723 for thumbnail in thumbnail_dicts:
2724 thumbnail_url = thumbnail.get('url')
2725 if not thumbnail_url:
2726 continue
2727 # Sometimes youtube gives a wrong thumbnail URL. See:
2728 # https://github.com/yt-dlp/yt-dlp/issues/233
2729 # https://github.com/ytdl-org/youtube-dl/issues/28023
2730 if 'maxresdefault' in thumbnail_url:
2731 thumbnail_url = thumbnail_url.split('?')[0]
2732 thumbnails.append({
2733 'url': thumbnail_url,
2734 'height': int_or_none(thumbnail.get('height')),
2735 'width': int_or_none(thumbnail.get('width')),
2736 })
ff2751ac 2737 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2738 if thumbnail_url:
2739 thumbnails.append({
2740 'url': thumbnail_url,
ff2751ac 2741 })
fccf5021 2742 original_thumbnails = thumbnails.copy()
2743
0ba692ac 2744 # The best resolution thumbnails sometimes does not appear in the webpage
2745 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2746 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 2747 thumbnail_names = [
2748 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
cca80fe6 2749 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2750 'mqdefault', 'mq1', 'mq2', 'mq3',
2751 'default', '1', '2', '3'
2752 ]
cca80fe6 2753 n_thumbnail_names = len(thumbnail_names)
0ba692ac 2754 thumbnails.extend({
2755 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2756 video_id=video_id, name=name, ext=ext,
2757 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2758 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2759 for thumb in thumbnails:
cca80fe6 2760 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2761 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2762 self._remove_duplicate_formats(thumbnails)
fccf5021 2763 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 2764
7ea65411 2765 category = get_first(microformats, 'category') or search_meta('genre')
2766 channel_id = str_or_none(
2767 get_first(video_details, 'channelId')
2768 or get_first(microformats, 'externalChannelId')
2769 or search_meta('channelId'))
2770 duration = int_or_none(
2771 get_first(video_details, 'lengthSeconds')
2772 or get_first(microformats, 'lengthSeconds')
2773 or parse_duration(search_meta('duration'))) or None
2774 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2775
2776 live_content = get_first(video_details, 'isLiveContent')
2777 is_upcoming = get_first(video_details, 'isUpcoming')
2778 if is_live is None:
2779 if is_upcoming or live_content is False:
2780 is_live = False
2781 if is_upcoming is None and (live_content or is_live):
2782 is_upcoming = False
2783 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2784 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2785 if not duration and live_endtime and live_starttime:
2786 duration = live_endtime - live_starttime
2787
545cc85d 2788 info = {
2789 'id': video_id,
2790 'title': self._live_title(video_title) if is_live else video_title,
2791 'formats': formats,
2792 'thumbnails': thumbnails,
fccf5021 2793 # The best thumbnail that we are sure exists. Prevents unnecessary
2794 # URL checking if user don't care about getting the best possible thumbnail
2795 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 2796 'description': video_description,
2797 'upload_date': unified_strdate(
11f9be09 2798 get_first(microformats, 'uploadDate')
545cc85d 2799 or search_meta('uploadDate')),
11f9be09 2800 'uploader': get_first(video_details, 'author'),
545cc85d 2801 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2802 'uploader_url': owner_profile_url,
2803 'channel_id': channel_id,
11f9be09 2804 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2805 'duration': duration,
2806 'view_count': int_or_none(
11f9be09 2807 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2808 or search_meta('interactionCount')),
11f9be09 2809 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2810 'age_limit': 18 if (
11f9be09 2811 get_first(microformats, 'isFamilySafe') is False
545cc85d 2812 or search_meta('isFamilyFriendly') == 'false'
2813 or search_meta('og:restrictions:age') == '18+') else 0,
2814 'webpage_url': webpage_url,
2815 'categories': [category] if category else None,
2816 'tags': keywords,
11f9be09 2817 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2818 'is_live': is_live,
2819 'was_live': (False if is_live or is_upcoming or live_content is False
2820 else None if is_live is None or is_upcoming is None
2821 else live_content),
2822 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2823 'release_timestamp': live_starttime,
545cc85d 2824 }
b477fc13 2825
3944e7af 2826 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 2827 if pctr:
ecdc9049 2828 def get_lang_code(track):
2829 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
2830 or track.get('languageCode'))
2831
2832 # Converted into dicts to remove duplicates
2833 captions = {
2834 get_lang_code(sub): sub
2835 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2836 translation_languages = {
2837 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
2838 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2839
774d79cc 2840 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2841 lang_subs = container.setdefault(lang_code, [])
545cc85d 2842 for fmt in self._SUBTITLE_FORMATS:
2843 query.update({
2844 'fmt': fmt,
2845 })
2846 lang_subs.append({
2847 'ext': fmt,
2848 'url': update_url_query(base_url, query),
774d79cc 2849 'name': sub_name,
545cc85d 2850 })
7e72694b 2851
ecdc9049 2852 subtitles, automatic_captions = {}, {}
2853 for lang_code, caption_track in captions.items():
2854 base_url = caption_track.get('baseUrl')
545cc85d 2855 if not base_url:
2856 continue
ecdc9049 2857 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 2858 if caption_track.get('kind') != 'asr':
545cc85d 2859 if not lang_code:
2860 continue
2861 process_language(
ecdc9049 2862 subtitles, base_url, lang_code, lang_name, {})
2863 if not caption_track.get('isTranslatable'):
2864 continue
3944e7af 2865 for trans_code, trans_name in translation_languages.items():
2866 if not trans_code:
545cc85d 2867 continue
ecdc9049 2868 if caption_track.get('kind') != 'asr':
2869 trans_code += f'-{lang_code}'
2870 trans_name += format_field(lang_name, template=' from %s')
545cc85d 2871 process_language(
ecdc9049 2872 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
2873 info['automatic_captions'] = automatic_captions
2874 info['subtitles'] = subtitles
7e72694b 2875
545cc85d 2876 parsed_url = compat_urllib_parse_urlparse(url)
2877 for component in [parsed_url.fragment, parsed_url.query]:
2878 query = compat_parse_qs(component)
2879 for k, v in query.items():
2880 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2881 d_k += '_time'
2882 if d_k not in info and k in s_ks:
2883 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2884
2885 # Youtube Music Auto-generated description
822b9d9c 2886 if video_description:
38d70284 2887 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2888 if mobj:
822b9d9c
RA
2889 release_year = mobj.group('release_year')
2890 release_date = mobj.group('release_date')
2891 if release_date:
2892 release_date = release_date.replace('-', '')
2893 if not release_year:
545cc85d 2894 release_year = release_date[:4]
2895 info.update({
2896 'album': mobj.group('album'.strip()),
2897 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2898 'track': mobj.group('track').strip(),
2899 'release_date': release_date,
cc2db878 2900 'release_year': int_or_none(release_year),
545cc85d 2901 })
7e72694b 2902
545cc85d 2903 initial_data = None
2904 if webpage:
2905 initial_data = self._extract_yt_initial_variable(
2906 webpage, self._YT_INITIAL_DATA_RE, video_id,
2907 'yt initial data')
2908 if not initial_data:
99e9e001 2909 query = {'videoId': video_id}
2910 query.update(self._get_checkok_params())
109dd3b2 2911 initial_data = self._extract_response(
2912 item_id=video_id, ep='next', fatal=False,
99e9e001 2913 ytcfg=master_ytcfg, query=query,
2914 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 2915 note='Downloading initial data API JSON')
545cc85d 2916
c60ee3a2 2917 try:
2918 # This will error if there is no livechat
2919 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
ecdc9049 2920 info.setdefault('subtitles', {})['live_chat'] = [{
c60ee3a2 2921 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2922 'video_id': video_id,
2923 'ext': 'json',
f6745c49 2924 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2925 }]
2926 except (KeyError, IndexError, TypeError):
2927 pass
545cc85d 2928
2929 if initial_data:
7c365c21 2930 info['chapters'] = (
2931 self._extract_chapters_from_json(initial_data, duration)
2932 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2933 or None)
545cc85d 2934
2935 contents = try_get(
2936 initial_data,
2937 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2938 list) or []
2939 for content in contents:
2940 vpir = content.get('videoPrimaryInfoRenderer')
2941 if vpir:
2942 stl = vpir.get('superTitleLink')
2943 if stl:
fe93e2c4 2944 stl = self._get_text(stl)
545cc85d 2945 if try_get(
2946 vpir,
2947 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2948 info['location'] = stl
2949 else:
2950 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2951 if mobj:
2952 info.update({
2953 'series': mobj.group(1),
2954 'season_number': int(mobj.group(2)),
2955 'episode_number': int(mobj.group(3)),
2956 })
2957 for tlb in (try_get(
2958 vpir,
2959 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2960 list) or []):
2961 tbr = tlb.get('toggleButtonRenderer') or {}
2962 for getter, regex in [(
2963 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2964 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2965 lambda x: x['accessibility'],
2966 lambda x: x['accessibilityData']['accessibilityData'],
2967 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2968 label = (try_get(tbr, getter, dict) or {}).get('label')
2969 if label:
2970 mobj = re.match(regex, label)
2971 if mobj:
2972 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2973 break
2974 sbr_tooltip = try_get(
2975 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2976 if sbr_tooltip:
2977 like_count, dislike_count = sbr_tooltip.split(' / ')
2978 info.update({
2979 'like_count': str_to_int(like_count),
2980 'dislike_count': str_to_int(dislike_count),
2981 })
2982 vsir = content.get('videoSecondaryInfoRenderer')
2983 if vsir:
052e1350 2984 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 2985 rows = try_get(
2986 vsir,
2987 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2988 list) or []
2989 multiple_songs = False
2990 for row in rows:
2991 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2992 multiple_songs = True
2993 break
2994 for row in rows:
2995 mrr = row.get('metadataRowRenderer') or {}
2996 mrr_title = mrr.get('title')
2997 if not mrr_title:
2998 continue
052e1350 2999 mrr_title = self._get_text(mrr, 'title')
3000 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3001 if mrr_title == 'License':
3002 info['license'] = mrr_contents_text
3003 elif not multiple_songs:
3004 if mrr_title == 'Album':
3005 info['album'] = mrr_contents_text
3006 elif mrr_title == 'Artist':
3007 info['artist'] = mrr_contents_text
3008 elif mrr_title == 'Song':
3009 info['track'] = mrr_contents_text
3010
3011 fallbacks = {
3012 'channel': 'uploader',
3013 'channel_id': 'uploader_id',
3014 'channel_url': 'uploader_url',
3015 }
3016 for to, frm in fallbacks.items():
3017 if not info.get(to):
3018 info[to] = info.get(frm)
3019
3020 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3021 v = info.get(s_k)
3022 if v:
3023 info[d_k] = v
b84071c0 3024
11f9be09 3025 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3026 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3027 is_membersonly = None
b28f8d24 3028 is_premium = None
c224251a
M
3029 if initial_data and is_private is not None:
3030 is_membersonly = False
b28f8d24 3031 is_premium = False
47193e02 3032 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3033 badge_labels = set()
3034 for content in contents:
3035 if not isinstance(content, dict):
3036 continue
3037 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3038 for badge_label in badge_labels:
3039 if badge_label.lower() == 'members only':
3040 is_membersonly = True
3041 elif badge_label.lower() == 'premium':
3042 is_premium = True
3043 elif badge_label.lower() == 'unlisted':
3044 is_unlisted = True
c224251a 3045
c224251a
M
3046 info['availability'] = self._availability(
3047 is_private=is_private,
b28f8d24 3048 needs_premium=is_premium,
c224251a
M
3049 needs_subscription=is_membersonly,
3050 needs_auth=info['age_limit'] >= 18,
3051 is_unlisted=None if is_private is None else is_unlisted)
3052
a2160aa4 3053 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3054
11f9be09 3055 self.mark_watched(video_id, player_responses)
d77ab8e2 3056
545cc85d 3057 return info
c5e8d7af 3058
5f6a1245 3059
8bdd16b4 3060class YoutubeTabIE(YoutubeBaseInfoExtractor):
96565c7e 3061 IE_DESC = 'YouTube Tabs'
70d5c17b 3062 _VALID_URL = r'''(?x)
3063 https?://
3064 (?:\w+\.)?
3065 (?:
3066 youtube(?:kids)?\.com|
3067 invidio\.us
3068 )/
3069 (?:
fe03a6cd 3070 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3071 (?P<not_channel>
9ba5705a 3072 feed/|hashtag/|
70d5c17b 3073 (?:playlist|watch)\?.*?\blist=
3074 )|
29f7c58a 3075 (?!(?:%s)\b) # Direct URLs
70d5c17b 3076 )
3077 (?P<id>[^/?\#&]+)
3078 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3079 IE_NAME = 'youtube:tab'
3080
81127aa5 3081 _TESTS = [{
da692b79 3082 'note': 'playlists, multipage',
8bdd16b4 3083 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3084 'playlist_mincount': 94,
3085 'info_dict': {
3086 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3087 'title': 'Игорь Клейнер - Playlists',
3088 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3089 'uploader': 'Игорь Клейнер',
3090 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3091 },
3092 }, {
da692b79 3093 'note': 'playlists, multipage, different order',
8bdd16b4 3094 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3095 'playlist_mincount': 94,
3096 'info_dict': {
3097 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3098 'title': 'Игорь Клейнер - Playlists',
3099 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3100 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3101 'uploader': 'Игорь Клейнер',
8bdd16b4 3102 },
201c1459 3103 }, {
da692b79 3104 'note': 'playlists, series',
201c1459 3105 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3106 'playlist_mincount': 5,
3107 'info_dict': {
3108 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3109 'title': '3Blue1Brown - Playlists',
3110 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3111 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3112 'uploader': '3Blue1Brown',
201c1459 3113 },
8bdd16b4 3114 }, {
da692b79 3115 'note': 'playlists, singlepage',
8bdd16b4 3116 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3117 'playlist_mincount': 4,
3118 'info_dict': {
3119 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3120 'title': 'ThirstForScience - Playlists',
3121 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3122 'uploader': 'ThirstForScience',
3123 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3124 }
3125 }, {
3126 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3127 'only_matching': True,
3128 }, {
da692b79 3129 'note': 'basic, single video playlist',
0e30a7b9 3130 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3131 'info_dict': {
0e30a7b9 3132 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3133 'uploader': 'Sergey M.',
3134 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3135 'title': 'youtube-dl public playlist',
81127aa5 3136 },
0e30a7b9 3137 'playlist_count': 1,
9291475f 3138 }, {
da692b79 3139 'note': 'empty playlist',
0e30a7b9 3140 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3141 'info_dict': {
0e30a7b9 3142 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3143 'uploader': 'Sergey M.',
3144 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3145 'title': 'youtube-dl empty playlist',
9291475f
PH
3146 },
3147 'playlist_count': 0,
3148 }, {
da692b79 3149 'note': 'Home tab',
8bdd16b4 3150 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3151 'info_dict': {
8bdd16b4 3152 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3153 'title': 'lex will - Home',
3154 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3155 'uploader': 'lex will',
3156 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3157 },
8bdd16b4 3158 'playlist_mincount': 2,
9291475f 3159 }, {
da692b79 3160 'note': 'Videos tab',
8bdd16b4 3161 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3162 'info_dict': {
8bdd16b4 3163 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3164 'title': 'lex will - Videos',
3165 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3166 'uploader': 'lex will',
3167 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3168 },
8bdd16b4 3169 'playlist_mincount': 975,
9291475f 3170 }, {
da692b79 3171 'note': 'Videos tab, sorted by popular',
8bdd16b4 3172 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3173 'info_dict': {
8bdd16b4 3174 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3175 'title': 'lex will - Videos',
3176 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3177 'uploader': 'lex will',
3178 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3179 },
8bdd16b4 3180 'playlist_mincount': 199,
9291475f 3181 }, {
da692b79 3182 'note': 'Playlists tab',
8bdd16b4 3183 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3184 'info_dict': {
8bdd16b4 3185 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3186 'title': 'lex will - Playlists',
3187 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3188 'uploader': 'lex will',
3189 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3190 },
8bdd16b4 3191 'playlist_mincount': 17,
ac7553d0 3192 }, {
da692b79 3193 'note': 'Community tab',
8bdd16b4 3194 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3195 'info_dict': {
8bdd16b4 3196 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3197 'title': 'lex will - Community',
3198 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3199 'uploader': 'lex will',
3200 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3201 },
3202 'playlist_mincount': 18,
87dadd45 3203 }, {
da692b79 3204 'note': 'Channels tab',
8bdd16b4 3205 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3206 'info_dict': {
8bdd16b4 3207 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3208 'title': 'lex will - Channels',
3209 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3210 'uploader': 'lex will',
3211 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3212 },
deaec5af 3213 'playlist_mincount': 12,
cd684175 3214 }, {
3215 'note': 'Search tab',
3216 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3217 'playlist_mincount': 40,
3218 'info_dict': {
3219 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3220 'title': '3Blue1Brown - Search - linear algebra',
3221 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3222 'uploader': '3Blue1Brown',
3223 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3224 },
6b08cdf6 3225 }, {
a0566bbf 3226 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3227 'only_matching': True,
3228 }, {
a0566bbf 3229 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3230 'only_matching': True,
3231 }, {
a0566bbf 3232 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3233 'only_matching': True,
3234 }, {
3235 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3236 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3237 'info_dict': {
3238 'title': '29C3: Not my department',
3239 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3240 'uploader': 'Christiaan008',
3241 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3242 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3243 },
3244 'playlist_count': 96,
3245 }, {
3246 'note': 'Large playlist',
3247 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3248 'info_dict': {
8bdd16b4 3249 'title': 'Uploads from Cauchemar',
3250 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3251 'uploader': 'Cauchemar',
3252 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3253 },
8bdd16b4 3254 'playlist_mincount': 1123,
3255 }, {
da692b79 3256 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3257 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3258 'only_matching': True,
4b7df0d3
JMF
3259 }, {
3260 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3261 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3262 'info_dict': {
acf757f4
PH
3263 'title': 'Uploads from Interstellar Movie',
3264 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3265 'uploader': 'Interstellar Movie',
8bdd16b4 3266 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3267 },
481cc733 3268 'playlist_mincount': 21,
358de58c 3269 }, {
3270 'note': 'Playlist with "show unavailable videos" button',
3271 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3272 'info_dict': {
3273 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3274 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3275 'uploader': 'Phim Siêu Nhân Nhật Bản',
3276 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3277 },
da692b79 3278 'playlist_mincount': 200,
5d342002 3279 }, {
da692b79 3280 'note': 'Playlist with unavailable videos in page 7',
5d342002 3281 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3282 'info_dict': {
3283 'title': 'Uploads from BlankTV',
3284 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3285 'uploader': 'BlankTV',
3286 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3287 },
da692b79 3288 'playlist_mincount': 1000,
8bdd16b4 3289 }, {
da692b79 3290 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3291 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3292 'info_dict': {
3293 'title': 'Data Analysis with Dr Mike Pound',
3294 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3295 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3296 'uploader': 'Computerphile',
deaec5af 3297 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3298 },
3299 'playlist_mincount': 11,
3300 }, {
a0566bbf 3301 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3302 'only_matching': True,
dacb3a86 3303 }, {
da692b79 3304 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3305 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3306 'info_dict': {
3307 'id': 'FqZTN594JQw',
3308 'ext': 'webm',
3309 'title': "Smiley's People 01 detective, Adventure Series, Action",
3310 'uploader': 'STREEM',
3311 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3312 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3313 'upload_date': '20150526',
3314 'license': 'Standard YouTube License',
3315 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3316 'categories': ['People & Blogs'],
3317 'tags': list,
dbdaaa23 3318 'view_count': int,
dacb3a86
S
3319 'like_count': int,
3320 'dislike_count': int,
3321 },
3322 'params': {
3323 'skip_download': True,
3324 },
13a75688 3325 'skip': 'This video is not available.',
dacb3a86 3326 'add_ie': [YoutubeIE.ie_key()],
481cc733 3327 }, {
8bdd16b4 3328 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3329 'only_matching': True,
66b48727 3330 }, {
8bdd16b4 3331 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3332 'only_matching': True,
a0566bbf 3333 }, {
3334 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3335 'info_dict': {
57015a4a 3336 'id': '3yImotZU3tw', # This will keep changing
a0566bbf 3337 'ext': 'mp4',
deaec5af 3338 'title': compat_str,
a0566bbf 3339 'uploader': 'Sky News',
3340 'uploader_id': 'skynews',
3341 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3342 'upload_date': r're:\d{8}',
3343 'description': compat_str,
a0566bbf 3344 'categories': ['News & Politics'],
3345 'tags': list,
3346 'like_count': int,
3347 'dislike_count': int,
3348 },
3349 'params': {
3350 'skip_download': True,
3351 },
da692b79 3352 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3353 }, {
3354 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3355 'info_dict': {
3356 'id': 'a48o2S1cPoo',
3357 'ext': 'mp4',
3358 'title': 'The Young Turks - Live Main Show',
3359 'uploader': 'The Young Turks',
3360 'uploader_id': 'TheYoungTurks',
3361 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3362 'upload_date': '20150715',
3363 'license': 'Standard YouTube License',
3364 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3365 'categories': ['News & Politics'],
3366 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3367 'like_count': int,
3368 'dislike_count': int,
3369 },
3370 'params': {
3371 'skip_download': True,
3372 },
3373 'only_matching': True,
3374 }, {
3375 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3376 'only_matching': True,
3377 }, {
3378 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3379 'only_matching': True,
09f1580e 3380 }, {
3381 'note': 'A channel that is not live. Should raise error',
3382 'url': 'https://www.youtube.com/user/numberphile/live',
3383 'only_matching': True,
3d3dddc9 3384 }, {
3385 'url': 'https://www.youtube.com/feed/trending',
3386 'only_matching': True,
3387 }, {
3d3dddc9 3388 'url': 'https://www.youtube.com/feed/library',
3389 'only_matching': True,
3390 }, {
3d3dddc9 3391 'url': 'https://www.youtube.com/feed/history',
3392 'only_matching': True,
3393 }, {
3d3dddc9 3394 'url': 'https://www.youtube.com/feed/subscriptions',
3395 'only_matching': True,
3396 }, {
3d3dddc9 3397 'url': 'https://www.youtube.com/feed/watch_later',
3398 'only_matching': True,
3399 }, {
ac56cf38 3400 'note': 'Recommended - redirects to home page.',
3d3dddc9 3401 'url': 'https://www.youtube.com/feed/recommended',
3402 'only_matching': True,
29f7c58a 3403 }, {
da692b79 3404 'note': 'inline playlist with not always working continuations',
29f7c58a 3405 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3406 'only_matching': True,
3407 }, {
3408 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3409 'only_matching': True,
3410 }, {
3411 'url': 'https://www.youtube.com/course',
3412 'only_matching': True,
3413 }, {
3414 'url': 'https://www.youtube.com/zsecurity',
3415 'only_matching': True,
3416 }, {
3417 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3418 'only_matching': True,
3419 }, {
3420 'url': 'https://www.youtube.com/TheYoungTurks/live',
3421 'only_matching': True,
39ed931e 3422 }, {
3423 'url': 'https://www.youtube.com/hashtag/cctv9',
3424 'info_dict': {
3425 'id': 'cctv9',
3426 'title': '#cctv9',
3427 },
3428 'playlist_mincount': 350,
201c1459 3429 }, {
3430 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3431 'only_matching': True,
9297939e 3432 }, {
da692b79 3433 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3434 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3435 'only_matching': True
fe03a6cd 3436 }, {
3437 'note': '/browse/ should redirect to /channel/',
3438 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3439 'only_matching': True
3440 }, {
3441 'note': 'VLPL, should redirect to playlist?list=PL...',
3442 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3443 'info_dict': {
3444 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3445 'uploader': 'NoCopyrightSounds',
3446 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3447 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3448 'title': 'NCS Releases',
3449 },
3450 'playlist_mincount': 166,
18db7548 3451 }, {
3452 'note': 'Topic, should redirect to playlist?list=UU...',
3453 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3454 'info_dict': {
3455 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3456 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3457 'title': 'Uploads from Royalty Free Music - Topic',
3458 'uploader': 'Royalty Free Music - Topic',
3459 },
3460 'expected_warnings': [
3461 'A channel/user page was given',
3462 'The URL does not have a videos tab',
3463 ],
3464 'playlist_mincount': 101,
3465 }, {
3466 'note': 'Topic without a UU playlist',
3467 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3468 'info_dict': {
3469 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3470 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3471 },
3472 'expected_warnings': [
3473 'A channel/user page was given',
3474 'The URL does not have a videos tab',
3475 'Falling back to channel URL',
3476 ],
3477 'playlist_mincount': 9,
abcdd12b 3478 }, {
3479 'note': 'Youtube music Album',
3480 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3481 'info_dict': {
3482 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3483 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3484 },
3485 'playlist_count': 50,
47193e02 3486 }, {
3487 'note': 'unlisted single video playlist',
3488 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3489 'info_dict': {
3490 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3491 'uploader': 'colethedj',
3492 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3493 'title': 'yt-dlp unlisted playlist test',
3494 'availability': 'unlisted'
3495 },
3496 'playlist_count': 1,
ac56cf38 3497 }, {
3498 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
3499 'url': 'https://www.youtube.com/feed/recommended',
3500 'info_dict': {
3501 'id': 'recommended',
3502 'title': 'recommended',
3503 },
3504 'playlist_mincount': 50,
3505 'params': {
3506 'skip_download': True,
3507 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3508 },
3509 }, {
3510 'note': 'API Fallback: /videos tab, sorted by oldest first',
3511 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
3512 'info_dict': {
3513 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
3514 'title': 'Cody\'sLab - Videos',
3515 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
3516 'uploader': 'Cody\'sLab',
3517 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
3518 },
3519 'playlist_mincount': 650,
3520 'params': {
3521 'skip_download': True,
3522 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3523 },
3524 }, {
3525 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
3526 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3527 'info_dict': {
3528 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3529 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3530 'title': 'Uploads from Royalty Free Music - Topic',
3531 'uploader': 'Royalty Free Music - Topic',
3532 },
3533 'expected_warnings': [
3534 'A channel/user page was given',
3535 'The URL does not have a videos tab',
3536 ],
3537 'playlist_mincount': 101,
3538 'params': {
3539 'skip_download': True,
3540 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3541 },
29f7c58a 3542 }]
3543
3544 @classmethod
3545 def suitable(cls, url):
3546 return False if YoutubeIE.suitable(url) else super(
3547 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3548
3549 def _extract_channel_id(self, webpage):
3550 channel_id = self._html_search_meta(
3551 'channelId', webpage, 'channel id', default=None)
3552 if channel_id:
3553 return channel_id
3554 channel_url = self._html_search_meta(
3555 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3556 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3557 'twitter:app:url:googleplay'), webpage, 'channel url')
3558 return self._search_regex(
3559 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3560 channel_url, 'channel id')
15f6397c 3561
8bdd16b4 3562 @staticmethod
cd7c66cf 3563 def _extract_basic_item_renderer(item):
3564 # Modified from _extract_grid_item_renderer
201c1459 3565 known_basic_renderers = (
3566 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3567 )
3568 for key, renderer in item.items():
201c1459 3569 if not isinstance(renderer, dict):
cd7c66cf 3570 continue
201c1459 3571 elif key in known_basic_renderers:
3572 return renderer
3573 elif key.startswith('grid') and key.endswith('Renderer'):
3574 return renderer
8bdd16b4 3575
8bdd16b4 3576 def _grid_entries(self, grid_renderer):
3577 for item in grid_renderer['items']:
3578 if not isinstance(item, dict):
39b62db1 3579 continue
cd7c66cf 3580 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3581 if not isinstance(renderer, dict):
3582 continue
052e1350 3583 title = self._get_text(renderer, 'title')
fe93e2c4 3584
8bdd16b4 3585 # playlist
3586 playlist_id = renderer.get('playlistId')
3587 if playlist_id:
3588 yield self.url_result(
3589 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3590 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3591 video_title=title)
201c1459 3592 continue
8bdd16b4 3593 # video
3594 video_id = renderer.get('videoId')
3595 if video_id:
3596 yield self._extract_video(renderer)
201c1459 3597 continue
8bdd16b4 3598 # channel
3599 channel_id = renderer.get('channelId')
3600 if channel_id:
8bdd16b4 3601 yield self.url_result(
3602 'https://www.youtube.com/channel/%s' % channel_id,
3603 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3604 continue
3605 # generic endpoint URL support
3606 ep_url = urljoin('https://www.youtube.com/', try_get(
3607 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3608 compat_str))
3609 if ep_url:
3610 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3611 if ie.suitable(ep_url):
3612 yield self.url_result(
3613 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3614 break
8bdd16b4 3615
3d3dddc9 3616 def _shelf_entries_from_content(self, shelf_renderer):
3617 content = shelf_renderer.get('content')
3618 if not isinstance(content, dict):
8bdd16b4 3619 return
cd7c66cf 3620 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3621 if renderer:
3622 # TODO: add support for nested playlists so each shelf is processed
3623 # as separate playlist
3624 # TODO: this includes only first N items
3625 for entry in self._grid_entries(renderer):
3626 yield entry
3627 renderer = content.get('horizontalListRenderer')
3628 if renderer:
3629 # TODO
3630 pass
8bdd16b4 3631
29f7c58a 3632 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3633 ep = try_get(
3634 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3635 compat_str)
3636 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3637 if shelf_url:
29f7c58a 3638 # Skipping links to another channels, note that checking for
3639 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3640 # will not work
3641 if skip_channels and '/channels?' in shelf_url:
3642 return
052e1350 3643 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3644 yield self.url_result(shelf_url, video_title=title)
3645 # Shelf may not contain shelf URL, fallback to extraction from content
3646 for entry in self._shelf_entries_from_content(shelf_renderer):
3647 yield entry
c5e8d7af 3648
8bdd16b4 3649 def _playlist_entries(self, video_list_renderer):
3650 for content in video_list_renderer['contents']:
3651 if not isinstance(content, dict):
3652 continue
3653 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3654 if not isinstance(renderer, dict):
3655 continue
3656 video_id = renderer.get('videoId')
3657 if not video_id:
3658 continue
3659 yield self._extract_video(renderer)
07aeced6 3660
3462ffa8 3661 def _rich_entries(self, rich_grid_renderer):
3662 renderer = try_get(
70d5c17b 3663 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3664 video_id = renderer.get('videoId')
3665 if not video_id:
3666 return
3667 yield self._extract_video(renderer)
3668
8bdd16b4 3669 def _video_entry(self, video_renderer):
3670 video_id = video_renderer.get('videoId')
3671 if video_id:
3672 return self._extract_video(video_renderer)
dacb3a86 3673
8bdd16b4 3674 def _post_thread_entries(self, post_thread_renderer):
3675 post_renderer = try_get(
3676 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3677 if not post_renderer:
3678 return
3679 # video attachment
3680 video_renderer = try_get(
895b0931 3681 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3682 video_id = video_renderer.get('videoId')
3683 if video_id:
3684 entry = self._extract_video(video_renderer)
8bdd16b4 3685 if entry:
3686 yield entry
895b0931 3687 # playlist attachment
3688 playlist_id = try_get(
3689 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3690 if playlist_id:
3691 yield self.url_result(
e28f1c0a 3692 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3693 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3694 # inline video links
3695 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3696 for run in runs:
3697 if not isinstance(run, dict):
3698 continue
3699 ep_url = try_get(
3700 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3701 if not ep_url:
3702 continue
3703 if not YoutubeIE.suitable(ep_url):
3704 continue
3705 ep_video_id = YoutubeIE._match_id(ep_url)
3706 if video_id == ep_video_id:
3707 continue
895b0931 3708 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3709
8bdd16b4 3710 def _post_thread_continuation_entries(self, post_thread_continuation):
3711 contents = post_thread_continuation.get('contents')
3712 if not isinstance(contents, list):
3713 return
3714 for content in contents:
3715 renderer = content.get('backstagePostThreadRenderer')
3716 if not isinstance(renderer, dict):
3717 continue
3718 for entry in self._post_thread_entries(renderer):
3719 yield entry
07aeced6 3720
39ed931e 3721 r''' # unused
3722 def _rich_grid_entries(self, contents):
3723 for content in contents:
3724 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3725 if video_renderer:
3726 entry = self._video_entry(video_renderer)
3727 if entry:
3728 yield entry
3729 '''
ac56cf38 3730 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3462ffa8 3731
70d5c17b 3732 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3733 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3734 for content in contents:
3735 if not isinstance(content, dict):
8bdd16b4 3736 continue
70d5c17b 3737 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3738 if not is_renderer:
70d5c17b 3739 renderer = content.get('richItemRenderer')
3462ffa8 3740 if renderer:
3741 for entry in self._rich_entries(renderer):
3742 yield entry
3743 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3744 continue
3462ffa8 3745 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3746 for isr_content in isr_contents:
3747 if not isinstance(isr_content, dict):
3748 continue
69184e41 3749
3750 known_renderers = {
3751 'playlistVideoListRenderer': self._playlist_entries,
3752 'gridRenderer': self._grid_entries,
3753 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3754 'backstagePostThreadRenderer': self._post_thread_entries,
3755 'videoRenderer': lambda x: [self._video_entry(x)],
3756 }
3757 for key, renderer in isr_content.items():
3758 if key not in known_renderers:
3759 continue
3760 for entry in known_renderers[key](renderer):
3761 if entry:
3762 yield entry
3462ffa8 3763 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3764 break
70d5c17b 3765
3462ffa8 3766 if not continuation_list[0]:
3767 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3768
3769 if not continuation_list[0]:
3770 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3771
ac56cf38 3772 continuation_list = [None] # Python 2 does not support nonlocal
29f7c58a 3773 tab_content = try_get(tab, lambda x: x['content'], dict)
3774 if not tab_content:
3775 return
3462ffa8 3776 parent_renderer = (
29f7c58a 3777 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3778 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3779 for entry in extract_entries(parent_renderer):
3780 yield entry
3462ffa8 3781 continuation = continuation_list[0]
d069eca7 3782
8bdd16b4 3783 for page_num in itertools.count(1):
3784 if not continuation:
3785 break
99e9e001 3786 headers = self.generate_api_headers(
3787 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 3788 response = self._extract_response(
3789 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3790 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3791 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3792
3793 if not response:
8bdd16b4 3794 break
ac56cf38 3795 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3796 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3797 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 3798
69184e41 3799 known_continuation_renderers = {
3800 'playlistVideoListContinuation': self._playlist_entries,
3801 'gridContinuation': self._grid_entries,
3802 'itemSectionContinuation': self._post_thread_continuation_entries,
3803 'sectionListContinuation': extract_entries, # for feeds
3804 }
8bdd16b4 3805 continuation_contents = try_get(
69184e41 3806 response, lambda x: x['continuationContents'], dict) or {}
3807 continuation_renderer = None
3808 for key, value in continuation_contents.items():
3809 if key not in known_continuation_renderers:
3462ffa8 3810 continue
69184e41 3811 continuation_renderer = value
3812 continuation_list = [None]
3813 for entry in known_continuation_renderers[key](continuation_renderer):
3814 yield entry
3815 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3816 break
3817 if continuation_renderer:
3818 continue
c5e8d7af 3819
a1b535bd 3820 known_renderers = {
3821 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3822 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3823 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3824 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3825 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3826 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3827 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3828 }
cce889b9 3829 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3830 continuation_items = try_get(
cce889b9 3831 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3832 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3833 video_items_renderer = None
3834 for key, value in continuation_item.items():
3835 if key not in known_renderers:
8bdd16b4 3836 continue
a1b535bd 3837 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3838 continuation_list = [None]
a1b535bd 3839 for entry in known_renderers[key][0](video_items_renderer):
3840 yield entry
9ba5705a 3841 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3842 break
3843 if video_items_renderer:
3844 continue
8bdd16b4 3845 break
9558dcec 3846
8bdd16b4 3847 @staticmethod
3848 def _extract_selected_tab(tabs):
3849 for tab in tabs:
cd684175 3850 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3851 if renderer.get('selected') is True:
3852 return renderer
2b3c2546 3853 else:
8bdd16b4 3854 raise ExtractorError('Unable to find selected tab')
b82f815f 3855
47193e02 3856 @classmethod
3857 def _extract_uploader(cls, data):
8bdd16b4 3858 uploader = {}
47193e02 3859 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3860 owner = try_get(
3861 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3862 if owner:
3863 uploader['uploader'] = owner.get('text')
3864 uploader['uploader_id'] = try_get(
3865 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3866 uploader['uploader_url'] = urljoin(
3867 'https://www.youtube.com/',
3868 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3869 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3870
ac56cf38 3871 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 3872 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 3873 thumbnails_list = []
3874 tags = []
b60419c5 3875
8bdd16b4 3876 selected_tab = self._extract_selected_tab(tabs)
3877 renderer = try_get(
3878 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3879 if renderer:
b60419c5 3880 channel_name = renderer.get('title')
3881 channel_url = renderer.get('channelUrl')
3882 channel_id = renderer.get('externalId')
39ed931e 3883 else:
64c0d954 3884 renderer = try_get(
3885 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3886
8bdd16b4 3887 if renderer:
3888 title = renderer.get('title')
ecc97af3 3889 description = renderer.get('description', '')
b60419c5 3890 playlist_id = channel_id
3891 tags = renderer.get('keywords', '').split()
3892 thumbnails_list = (
3893 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3894 or try_get(
47193e02 3895 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3896 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3897 list)
b60419c5 3898 or [])
3899
3900 thumbnails = []
3901 for t in thumbnails_list:
3902 if not isinstance(t, dict):
3903 continue
3904 thumbnail_url = url_or_none(t.get('url'))
3905 if not thumbnail_url:
3906 continue
3907 thumbnails.append({
3908 'url': thumbnail_url,
3909 'width': int_or_none(t.get('width')),
3910 'height': int_or_none(t.get('height')),
3911 })
3462ffa8 3912 if playlist_id is None:
70d5c17b 3913 playlist_id = item_id
3914 if title is None:
39ed931e 3915 title = (
3916 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3917 or playlist_id)
b60419c5 3918 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3919 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3920 metadata = {
3921 'playlist_id': playlist_id,
3922 'playlist_title': title,
3923 'playlist_description': description,
3924 'uploader': channel_name,
3925 'uploader_id': channel_id,
3926 'uploader_url': channel_url,
3927 'thumbnails': thumbnails,
3928 'tags': tags,
3929 }
47193e02 3930 availability = self._extract_availability(data)
3931 if availability:
3932 metadata['availability'] = availability
b60419c5 3933 if not channel_id:
3934 metadata.update(self._extract_uploader(data))
3935 metadata.update({
3936 'channel': metadata['uploader'],
3937 'channel_id': metadata['uploader_id'],
3938 'channel_url': metadata['uploader_url']})
3939 return self.playlist_result(
d069eca7 3940 self._entries(
ac56cf38 3941 selected_tab, playlist_id, ytcfg,
3942 self._extract_account_syncid(ytcfg, data),
3943 self._extract_visitor_data(data, ytcfg)),
b60419c5 3944 **metadata)
73c4ac2c 3945
ac56cf38 3946 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3947 first_id = last_id = response = None
2be71994 3948 for page_num in itertools.count(1):
cd7c66cf 3949 videos = list(self._playlist_entries(playlist))
3950 if not videos:
3951 return
2be71994 3952 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3953 if start >= len(videos):
3954 return
3955 for video in videos[start:]:
3956 if video['id'] == first_id:
3957 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3958 return
3959 yield video
3960 first_id = first_id or videos[0]['id']
3961 last_id = videos[-1]['id']
79360d99 3962 watch_endpoint = try_get(
3963 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 3964 headers = self.generate_api_headers(
3965 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3966 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 3967 query = {
3968 'playlistId': playlist_id,
3969 'videoId': watch_endpoint.get('videoId') or last_id,
3970 'index': watch_endpoint.get('index') or len(videos),
3971 'params': watch_endpoint.get('params') or 'OAE%3D'
3972 }
3973 response = self._extract_response(
3974 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3975 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3976 check_get_keys='contents'
3977 )
cd7c66cf 3978 playlist = try_get(
79360d99 3979 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3980
ac56cf38 3981 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 3982 title = playlist.get('title') or try_get(
3983 data, lambda x: x['titleText']['simpleText'], compat_str)
3984 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3985
3986 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3987 playlist_url = urljoin(url, try_get(
3988 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3989 compat_str))
3990 if playlist_url and playlist_url != url:
3991 return self.url_result(
3992 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3993 video_title=title)
cd7c66cf 3994
8bdd16b4 3995 return self.playlist_result(
ac56cf38 3996 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 3997 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3998
47193e02 3999 def _extract_availability(self, data):
4000 """
4001 Gets the availability of a given playlist/tab.
4002 Note: Unless YouTube tells us explicitly, we do not assume it is public
4003 @param data: response
4004 """
4005 is_private = is_unlisted = None
4006 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4007 badge_labels = self._extract_badges(renderer)
4008
4009 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4010 privacy_dropdown_entries = try_get(
4011 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4012 for renderer_dict in privacy_dropdown_entries:
4013 is_selected = try_get(
4014 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4015 if not is_selected:
4016 continue
052e1350 4017 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4018 if label:
4019 badge_labels.add(label.lower())
4020 break
4021
4022 for badge_label in badge_labels:
4023 if badge_label == 'unlisted':
4024 is_unlisted = True
4025 elif badge_label == 'private':
4026 is_private = True
4027 elif badge_label == 'public':
4028 is_unlisted = is_private = False
4029 return self._availability(is_private, False, False, False, is_unlisted)
4030
4031 @staticmethod
4032 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4033 sidebar_renderer = try_get(
4034 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4035 for item in sidebar_renderer:
4036 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4037 if renderer:
4038 return renderer
4039
ac56cf38 4040 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 4041 """
4042 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4043 """
5d342002 4044 browse_id = params = None
47193e02 4045 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4046 if not renderer:
4047 return
4048 menu_renderer = try_get(
4049 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4050 for menu_item in menu_renderer:
4051 if not isinstance(menu_item, dict):
358de58c 4052 continue
47193e02 4053 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4054 text = try_get(
4055 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4056 if not text or text.lower() != 'show unavailable videos':
4057 continue
4058 browse_endpoint = try_get(
4059 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4060 browse_id = browse_endpoint.get('browseId')
4061 params = browse_endpoint.get('params')
4062 break
5d342002 4063
11f9be09 4064 headers = self.generate_api_headers(
99e9e001 4065 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 4066 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 4067 query = {
4068 'params': params or 'wgYCCAA=',
4069 'browseId': browse_id or 'VL%s' % item_id
4070 }
4071 return self._extract_response(
4072 item_id=item_id, headers=headers, query=query,
fe93e2c4 4073 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4074 note='Downloading API JSON with unavailable videos')
358de58c 4075
ac56cf38 4076 def _extract_webpage(self, url, item_id, fatal=True):
a06916d9 4077 retries = self.get_param('extractor_retries', 3)
62bff2c1 4078 count = -1
ac56cf38 4079 webpage = data = last_error = None
14fdfea9 4080 while count < retries:
62bff2c1 4081 count += 1
14fdfea9 4082 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4083 # See: https://github.com/yt-dlp/yt-dlp/issues/116
ac56cf38 4084 if last_error:
c705177d 4085 self.report_warning('%s. Retrying ...' % last_error)
ac56cf38 4086 try:
4087 webpage = self._download_webpage(
4088 url, item_id,
4089 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
4090 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4091 except ExtractorError as e:
4092 if isinstance(e.cause, network_exceptions):
4093 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
4094 last_error = error_to_compat_str(e.cause or e.msg)
4095 if count < retries:
4096 continue
4097 if fatal:
4098 raise
4099 self.report_warning(error_to_compat_str(e))
14fdfea9 4100 break
ac56cf38 4101 else:
4102 try:
4103 self._extract_and_report_alerts(data)
4104 except ExtractorError as e:
4105 if fatal:
4106 raise
4107 self.report_warning(error_to_compat_str(e))
4108 break
4109
4110 if dict_get(data, ('contents', 'currentVideoEndpoint')):
4111 break
4112
4113 last_error = 'Incomplete yt initial data received'
4114 if count >= retries:
4115 if fatal:
4116 raise ExtractorError(last_error)
4117 self.report_warning(last_error)
4118 break
4119
cd7c66cf 4120 return webpage, data
4121
ac56cf38 4122 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4123 data = None
4124 if 'webpage' not in self._configuration_arg('skip'):
4125 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4126 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4127 if not data:
4128 if not ytcfg and self.is_authenticated:
4129 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
4130 if 'authcheck' not in self._configuration_arg('skip') and fatal:
4131 raise ExtractorError(
4132 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
4133 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4134 expected=True)
4135 self.report_warning(msg, only_once=True)
4136 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4137 return data, ytcfg
4138
4139 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4140 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4141 resolve_response = self._extract_response(
4142 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4143 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4144 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4145 for ep_key, ep in endpoints.items():
4146 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4147 if params:
4148 return self._extract_response(
4149 item_id=item_id, query=params, ep=ep, headers=headers,
4150 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4151 check_get_keys=('contents', 'currentVideoEndpoint'))
4152 err_note = 'Failed to resolve url (does the playlist exist?)'
4153 if fatal:
4154 raise ExtractorError(err_note, expected=True)
4155 self.report_warning(err_note, item_id)
4156
9297939e 4157 @staticmethod
4158 def _smuggle_data(entries, data):
4159 for entry in entries:
4160 if data:
4161 entry['url'] = smuggle_url(entry['url'], data)
4162 yield entry
4163
cd7c66cf 4164 def _real_extract(self, url):
9297939e 4165 url, smuggled_data = unsmuggle_url(url, {})
4166 if self.is_music_url(url):
4167 smuggled_data['is_music_url'] = True
fe03a6cd 4168 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4169 if info_dict.get('entries'):
4170 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4171 return info_dict
4172
fe03a6cd 4173 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4174
4175 def __real_extract(self, url, smuggled_data):
cd7c66cf 4176 item_id = self._match_id(url)
4177 url = compat_urlparse.urlunparse(
4178 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4179 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4180
fe03a6cd 4181 def get_mobj(url):
4182 mobj = self._url_re.match(url).groupdict()
07cce701 4183 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4184 return mobj
4185
4186 mobj = get_mobj(url)
4187 # Youtube returns incomplete data if tabname is not lower case
4188 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 4189 if is_channel:
4190 if smuggled_data.get('is_music_url'):
4191 if item_id[:2] == 'VL':
4192 # Youtube music VL channels have an equivalent playlist
4193 item_id = item_id[2:]
4194 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4195 elif item_id[:2] == 'MP':
ac56cf38 4196 # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4197 mdata = self._extract_tab_endpoint(
4198 'https://music.youtube.com/channel/%s' % item_id, item_id, default_client='web_music')
4199 murl = traverse_obj(
4200 mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), get_all=False, expected_type=compat_str)
4201 if not murl:
4202 raise ExtractorError('Failed to resolve album to playlist.')
4203 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
fe03a6cd 4204 elif mobj['channel_type'] == 'browse':
4205 # Youtube music /browse/ should be changed to /channel/
4206 pre = 'https://www.youtube.com/channel/%s' % item_id
4207 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4208 # Home URLs should redirect to /videos/
6a39ee13 4209 self.report_warning(
cd7c66cf 4210 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4211 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4212 tab = '/videos'
4213
4214 url = ''.join((pre, tab, post))
4215 mobj = get_mobj(url)
cd7c66cf 4216
4217 # Handle both video/playlist URLs
201c1459 4218 qs = parse_qs(url)
cd7c66cf 4219 video_id = qs.get('v', [None])[0]
4220 playlist_id = qs.get('list', [None])[0]
4221
fe03a6cd 4222 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4223 if not playlist_id:
fe03a6cd 4224 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4225 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4226 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4227 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4228 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4229 mobj = get_mobj(url)
cd7c66cf 4230
4231 if video_id and playlist_id:
a06916d9 4232 if self.get_param('noplaylist'):
cd7c66cf 4233 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
5e3f2f8f 4234 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4235 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4236
ac56cf38 4237 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 4238
18db7548 4239 tabs = try_get(
4240 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4241 if tabs:
4242 selected_tab = self._extract_selected_tab(tabs)
4243 tab_name = selected_tab.get('title', '')
09f1580e 4244 if 'no-youtube-channel-redirect' not in compat_opts:
4245 if mobj['tab'] == '/live':
4246 # Live tab should have redirected to the video
4247 raise ExtractorError('The channel is not currently live', expected=True)
4248 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4249 if not mobj['not_channel'] and item_id[:2] == 'UC':
4250 # Topic channels don't have /videos. Use the equivalent playlist instead
4251 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4252 pl_id = 'UU%s' % item_id[2:]
4253 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4254 try:
ac56cf38 4255 data, ytcfg, item_id, url = *self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True), pl_id, pl_url
09f1580e 4256 except ExtractorError:
4257 self.report_warning('The playlist gave error. Falling back to channel URL')
4258 else:
4259 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4260
4261 self.write_debug('Final URL: %s' % url)
4262
358de58c 4263 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4264 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 4265 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 4266 self._extract_and_report_alerts(data, only_once=True)
8bdd16b4 4267 tabs = try_get(
4268 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4269 if tabs:
ac56cf38 4270 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 4271
8bdd16b4 4272 playlist = try_get(
4273 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4274 if playlist:
ac56cf38 4275 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 4276
a0566bbf 4277 video_id = try_get(
4278 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4279 compat_str) or video_id
8bdd16b4 4280 if video_id:
09f1580e 4281 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4282 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
5e3f2f8f 4283 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4284
8bdd16b4 4285 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4286
c5e8d7af 4287
8bdd16b4 4288class YoutubePlaylistIE(InfoExtractor):
96565c7e 4289 IE_DESC = 'YouTube playlists'
8bdd16b4 4290 _VALID_URL = r'''(?x)(?:
4291 (?:https?://)?
4292 (?:\w+\.)?
4293 (?:
4294 (?:
4295 youtube(?:kids)?\.com|
29f7c58a 4296 invidio\.us
8bdd16b4 4297 )
4298 /.*?\?.*?\blist=
4299 )?
4300 (?P<id>%(playlist_id)s)
4301 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4302 IE_NAME = 'youtube:playlist'
cdc628a4 4303 _TESTS = [{
8bdd16b4 4304 'note': 'issue #673',
4305 'url': 'PLBB231211A4F62143',
cdc628a4 4306 'info_dict': {
8bdd16b4 4307 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4308 'id': 'PLBB231211A4F62143',
4309 'uploader': 'Wickydoo',
4310 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4311 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4312 },
4313 'playlist_mincount': 29,
4314 }, {
4315 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4316 'info_dict': {
4317 'title': 'YDL_safe_search',
4318 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4319 },
4320 'playlist_count': 2,
4321 'skip': 'This playlist is private',
9558dcec 4322 }, {
8bdd16b4 4323 'note': 'embedded',
4324 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4325 'playlist_count': 4,
9558dcec 4326 'info_dict': {
8bdd16b4 4327 'title': 'JODA15',
4328 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4329 'uploader': 'milan',
4330 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4331 }
cdc628a4 4332 }, {
8bdd16b4 4333 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4334 'playlist_mincount': 654,
8bdd16b4 4335 'info_dict': {
4336 'title': '2018 Chinese New Singles (11/6 updated)',
4337 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4338 'uploader': 'LBK',
4339 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4340 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4341 }
daa0df9e 4342 }, {
29f7c58a 4343 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4344 'only_matching': True,
4345 }, {
4346 # music album playlist
4347 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4348 'only_matching': True,
4349 }]
4350
4351 @classmethod
4352 def suitable(cls, url):
201c1459 4353 if YoutubeTabIE.suitable(url):
4354 return False
49a57e70 4355 from ..utils import parse_qs
201c1459 4356 qs = parse_qs(url)
4357 if qs.get('v', [None])[0]:
4358 return False
4359 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4360
4361 def _real_extract(self, url):
4362 playlist_id = self._match_id(url)
46953e7e 4363 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4364 url = update_url_query(
4365 'https://www.youtube.com/playlist',
4366 parse_qs(url) or {'list': playlist_id})
4367 if is_music_url:
4368 url = smuggle_url(url, {'is_music_url': True})
4369 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4370
4371
4372class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4373 IE_DESC = 'youtu.be'
29f7c58a 4374 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4375 _TESTS = [{
8bdd16b4 4376 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4377 'info_dict': {
4378 'id': 'yeWKywCrFtk',
4379 'ext': 'mp4',
4380 'title': 'Small Scale Baler and Braiding Rugs',
4381 'uploader': 'Backus-Page House Museum',
4382 'uploader_id': 'backuspagemuseum',
4383 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4384 'upload_date': '20161008',
4385 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4386 'categories': ['Nonprofits & Activism'],
4387 'tags': list,
4388 'like_count': int,
4389 'dislike_count': int,
4390 },
4391 'params': {
4392 'noplaylist': True,
4393 'skip_download': True,
4394 },
39e7107d 4395 }, {
8bdd16b4 4396 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4397 'only_matching': True,
cdc628a4
PH
4398 }]
4399
8bdd16b4 4400 def _real_extract(self, url):
5ad28e7f 4401 mobj = self._match_valid_url(url)
29f7c58a 4402 video_id = mobj.group('id')
4403 playlist_id = mobj.group('playlist_id')
8bdd16b4 4404 return self.url_result(
29f7c58a 4405 update_url_query('https://www.youtube.com/watch', {
4406 'v': video_id,
4407 'list': playlist_id,
4408 'feature': 'youtu.be',
4409 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4410
4411
4412class YoutubeYtUserIE(InfoExtractor):
96565c7e 4413 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
8bdd16b4 4414 _VALID_URL = r'ytuser:(?P<id>.+)'
4415 _TESTS = [{
4416 'url': 'ytuser:phihag',
4417 'only_matching': True,
4418 }]
4419
4420 def _real_extract(self, url):
4421 user_id = self._match_id(url)
4422 return self.url_result(
4423 'https://www.youtube.com/user/%s' % user_id,
4424 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4425
b05654f0 4426
3d3dddc9 4427class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4428 IE_NAME = 'youtube:favorites'
96565c7e 4429 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 4430 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4431 _LOGIN_REQUIRED = True
4432 _TESTS = [{
4433 'url': ':ytfav',
4434 'only_matching': True,
4435 }, {
4436 'url': ':ytfavorites',
4437 'only_matching': True,
4438 }]
4439
4440 def _real_extract(self, url):
4441 return self.url_result(
4442 'https://www.youtube.com/playlist?list=LL',
4443 ie=YoutubeTabIE.ie_key())
4444
4445
79360d99 4446class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
96565c7e 4447 IE_DESC = 'YouTube searches'
78caa52a 4448 IE_NAME = 'youtube:search'
b05654f0 4449 _SEARCH_KEY = 'ytsearch'
6c894ea1 4450 _SEARCH_PARAMS = None
9dd8e46a 4451 _TESTS = []
b05654f0 4452
cc16383f 4453 def _search_results(self, query):
a5c56234 4454 data = {'query': query}
6c894ea1
U
4455 if self._SEARCH_PARAMS:
4456 data['params'] = self._SEARCH_PARAMS
fe93e2c4 4457 continuation = {}
6c894ea1 4458 for page_num in itertools.count(1):
fe93e2c4 4459 data.update(continuation)
79360d99 4460 search = self._extract_response(
4461 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4462 check_get_keys=('contents', 'onResponseReceivedCommands')
4463 )
6c894ea1 4464 if not search:
b4c08069 4465 break
6c894ea1
U
4466 slr_contents = try_get(
4467 search,
4468 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4469 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4470 list)
4471 if not slr_contents:
a22b2fd1 4472 break
0366ae87 4473
0366ae87
M
4474 # Youtube sometimes adds promoted content to searches,
4475 # changing the index location of videos and token.
4476 # So we search through all entries till we find them.
fe93e2c4 4477 continuation = None
30a074c2 4478 for slr_content in slr_contents:
fe93e2c4 4479 if not continuation:
4480 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4481
30a074c2 4482 isr_contents = try_get(
4483 slr_content,
4484 lambda x: x['itemSectionRenderer']['contents'],
4485 list)
9da76d30 4486 if not isr_contents:
30a074c2 4487 continue
4488 for content in isr_contents:
4489 if not isinstance(content, dict):
4490 continue
4491 video = content.get('videoRenderer')
4492 if not isinstance(video, dict):
4493 continue
4494 video_id = video.get('videoId')
4495 if not video_id:
4496 continue
4497
4498 yield self._extract_video(video)
0366ae87 4499
fe93e2c4 4500 if not continuation:
6c894ea1 4501 break
b05654f0 4502
c9ae7b95 4503
a3dd9248 4504class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4505 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4506 _SEARCH_KEY = 'ytsearchdate'
96565c7e 4507 IE_DESC = 'YouTube searches, newest videos first'
6c894ea1 4508 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4509
c9ae7b95 4510
386e1dd9 4511class YoutubeSearchURLIE(YoutubeSearchIE):
96565c7e 4512 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 4513 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
96565c7e 4514 _SEARCH_KEY = None
386e1dd9 4515 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4516 # _MAX_RESULTS = 100
3462ffa8 4517 _TESTS = [{
4518 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4519 'playlist_mincount': 5,
4520 'info_dict': {
11f9be09 4521 'id': 'youtube-dl test video',
3462ffa8 4522 'title': 'youtube-dl test video',
4523 }
4524 }, {
4525 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4526 'only_matching': True,
4527 }]
4528
386e1dd9 4529 @classmethod
4530 def _make_valid_url(cls):
4531 return cls._VALID_URL
4532
3462ffa8 4533 def _real_extract(self, url):
4dfbf869 4534 qs = parse_qs(url)
386e1dd9 4535 query = (qs.get('search_query') or qs.get('q'))[0]
4536 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4537 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4538
4539
4540class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4541 """
25f14e9f 4542 Base class for feed extractors
3d3dddc9 4543 Subclasses must define the _FEED_NAME property.
d7ae0639 4544 """
b2e8bc1b 4545 _LOGIN_REQUIRED = True
ef2f3c7f 4546 _TESTS = []
d7ae0639
JMF
4547
4548 @property
4549 def IE_NAME(self):
78caa52a 4550 return 'youtube:%s' % self._FEED_NAME
04cc9617 4551
3853309f 4552 def _real_extract(self, url):
3d3dddc9 4553 return self.url_result(
4554 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4555 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4556
4557
ef2f3c7f 4558class YoutubeWatchLaterIE(InfoExtractor):
4559 IE_NAME = 'youtube:watchlater'
96565c7e 4560 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 4561 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4562 _TESTS = [{
8bdd16b4 4563 'url': ':ytwatchlater',
bc7a9cd8
S
4564 'only_matching': True,
4565 }]
25f14e9f
S
4566
4567 def _real_extract(self, url):
ef2f3c7f 4568 return self.url_result(
4569 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4570
4571
25f14e9f 4572class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 4573 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 4574 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4575 _FEED_NAME = 'recommended'
45db527f 4576 _LOGIN_REQUIRED = False
3d3dddc9 4577 _TESTS = [{
4578 'url': ':ytrec',
4579 'only_matching': True,
4580 }, {
4581 'url': ':ytrecommended',
4582 'only_matching': True,
4583 }, {
4584 'url': 'https://youtube.com',
4585 'only_matching': True,
4586 }]
1ed5b5c9 4587
1ed5b5c9 4588
25f14e9f 4589class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 4590 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 4591 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4592 _FEED_NAME = 'subscriptions'
3d3dddc9 4593 _TESTS = [{
4594 'url': ':ytsubs',
4595 'only_matching': True,
4596 }, {
4597 'url': ':ytsubscriptions',
4598 'only_matching': True,
4599 }]
1ed5b5c9 4600
1ed5b5c9 4601
25f14e9f 4602class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 4603 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 4604 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4605 _FEED_NAME = 'history'
3d3dddc9 4606 _TESTS = [{
4607 'url': ':ythistory',
4608 'only_matching': True,
4609 }]
1ed5b5c9
JMF
4610
4611
15870e90
PH
4612class YoutubeTruncatedURLIE(InfoExtractor):
4613 IE_NAME = 'youtube:truncated_url'
4614 IE_DESC = False # Do not list
975d35db 4615 _VALID_URL = r'''(?x)
b95aab84
PH
4616 (?:https?://)?
4617 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4618 (?:watch\?(?:
c4808c60 4619 feature=[a-z_]+|
b95aab84
PH
4620 annotation_id=annotation_[^&]+|
4621 x-yt-cl=[0-9]+|
c1708b89 4622 hl=[^&]*|
287be8c6 4623 t=[0-9]+
b95aab84
PH
4624 )?
4625 |
4626 attribution_link\?a=[^&]+
4627 )
4628 $
975d35db 4629 '''
15870e90 4630
c4808c60 4631 _TESTS = [{
2d3d2997 4632 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4633 'only_matching': True,
dc2fc736 4634 }, {
2d3d2997 4635 'url': 'https://www.youtube.com/watch?',
dc2fc736 4636 'only_matching': True,
b95aab84
PH
4637 }, {
4638 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4639 'only_matching': True,
4640 }, {
4641 'url': 'https://www.youtube.com/watch?feature=foo',
4642 'only_matching': True,
c1708b89
PH
4643 }, {
4644 'url': 'https://www.youtube.com/watch?hl=en-GB',
4645 'only_matching': True,
287be8c6
PH
4646 }, {
4647 'url': 'https://www.youtube.com/watch?t=2372',
4648 'only_matching': True,
c4808c60
PH
4649 }]
4650
15870e90
PH
4651 def _real_extract(self, url):
4652 raise ExtractorError(
78caa52a
PH
4653 'Did you forget to quote the URL? Remember that & is a meta '
4654 'character in most shells, so you want to put the URL in quotes, '
3867038a 4655 'like youtube-dl '
2d3d2997 4656 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4657 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4658 expected=True)
772fd5cc
PH
4659
4660
3cd786db 4661class YoutubeClipIE(InfoExtractor):
4662 IE_NAME = 'youtube:clip'
4663 IE_DESC = False # Do not list
4664 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4665
4666 def _real_extract(self, url):
4667 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4668 return self.url_result(url, 'Generic')
4669
4670
772fd5cc
PH
4671class YoutubeTruncatedIDIE(InfoExtractor):
4672 IE_NAME = 'youtube:truncated_id'
4673 IE_DESC = False # Do not list
b95aab84 4674 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4675
4676 _TESTS = [{
4677 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4678 'only_matching': True,
4679 }]
4680
4681 def _real_extract(self, url):
4682 video_id = self._match_id(url)
4683 raise ExtractorError(
4684 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4685 expected=True)