]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[LnkIE] Add extractor (#2408)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
109dd3b2 6import copy
fe93e2c4 7import datetime
adbc4ec4 8import functools
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
720c3099 12import math
c4417ddb 13import os.path
d77ab8e2 14import random
c5e8d7af 15import re
46383212 16import sys
8a784c74 17import time
e0df6211 18import traceback
adbc4ec4 19import threading
c5e8d7af 20
b05654f0 21from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 22from ..compat import (
edf3e38e 23 compat_chr,
29f7c58a 24 compat_HTTPError,
c5e8d7af 25 compat_parse_qs,
545cc85d 26 compat_str,
7fd002c0 27 compat_urllib_parse_unquote_plus,
15707c7e 28 compat_urllib_parse_urlencode,
7c80519c 29 compat_urllib_parse_urlparse,
7c61bd36 30 compat_urlparse,
4bb4a188 31)
545cc85d 32from ..jsinterp import JSInterpreter
4bb4a188 33from ..utils import (
720c3099 34 bug_reports_message,
c5e8d7af 35 clean_html,
d92f5d5a 36 datetime_from_str,
11f9be09 37 dict_get,
358de58c 38 error_to_compat_str,
c5e8d7af 39 ExtractorError,
2d30521a 40 float_or_none,
11f9be09 41 format_field,
dd27fd17 42 int_or_none,
641ad5d8 43 is_html,
34921b43 44 join_nonempty,
94278f72 45 mimetype2ext,
9c0d7f49 46 network_exceptions,
a6213a49 47 NO_DEFAULT,
11f9be09 48 orderedSet,
6310acf5 49 parse_codecs,
49bd8c66 50 parse_count,
7c80519c 51 parse_duration,
7ea65411 52 parse_iso8601,
4dfbf869 53 parse_qs,
dca3ff4a 54 qualities,
c0ac49bc 55 remove_end,
3995d37d 56 remove_start,
cf7e015f 57 smuggle_url,
dbdaaa23 58 str_or_none,
c93d53f5 59 str_to_int,
f3aa3c3f 60 strftime_or_none,
7c365c21 61 traverse_obj,
556dbe7f 62 try_get,
c5e8d7af
PH
63 unescapeHTML,
64 unified_strdate,
f0d785d3 65 unified_timestamp,
cf7e015f 66 unsmuggle_url,
8bdd16b4 67 update_url_query,
21c340b8 68 url_or_none,
fe93e2c4 69 urljoin,
7c365c21 70 variadic,
c5e8d7af
PH
71)
72
5f6a1245 73
720c3099 74def get_first(obj, keys, **kwargs):
75 return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
76
77
000c15a4 78# any clients starting with _ cannot be explicity requested by the user
79INNERTUBE_CLIENTS = {
80 'web': {
81 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
82 'INNERTUBE_CONTEXT': {
83 'client': {
84 'clientName': 'WEB',
18c7683d 85 'clientVersion': '2.20211221.00.00',
000c15a4 86 }
87 },
88 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
89 },
90 'web_embedded': {
91 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
92 'INNERTUBE_CONTEXT': {
93 'client': {
94 'clientName': 'WEB_EMBEDDED_PLAYER',
18c7683d 95 'clientVersion': '1.20211215.00.01',
000c15a4 96 },
97 },
98 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
99 },
100 'web_music': {
101 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
102 'INNERTUBE_HOST': 'music.youtube.com',
103 'INNERTUBE_CONTEXT': {
104 'client': {
105 'clientName': 'WEB_REMIX',
18c7683d 106 'clientVersion': '1.20211213.00.00',
000c15a4 107 }
108 },
109 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
110 },
e7e94f2a 111 'web_creator': {
18c7683d 112 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
113 'INNERTUBE_CONTEXT': {
114 'client': {
115 'clientName': 'WEB_CREATOR',
18c7683d 116 'clientVersion': '1.20211220.02.00',
e7e94f2a
D
117 }
118 },
119 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
120 },
000c15a4 121 'android': {
18c7683d 122 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 123 'INNERTUBE_CONTEXT': {
124 'client': {
125 'clientName': 'ANDROID',
18c7683d 126 'clientVersion': '16.49',
000c15a4 127 }
128 },
129 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 130 'REQUIRE_JS_PLAYER': False
000c15a4 131 },
132 'android_embedded': {
18c7683d 133 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 134 'INNERTUBE_CONTEXT': {
135 'client': {
136 'clientName': 'ANDROID_EMBEDDED_PLAYER',
18c7683d 137 'clientVersion': '16.49',
000c15a4 138 },
139 },
b6de707d 140 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
141 'REQUIRE_JS_PLAYER': False
000c15a4 142 },
143 'android_music': {
18c7683d 144 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 145 'INNERTUBE_CONTEXT': {
146 'client': {
147 'clientName': 'ANDROID_MUSIC',
18c7683d 148 'clientVersion': '4.57',
000c15a4 149 }
150 },
151 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 152 'REQUIRE_JS_PLAYER': False
000c15a4 153 },
e7e94f2a 154 'android_creator': {
18c7683d 155 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
156 'INNERTUBE_CONTEXT': {
157 'client': {
158 'clientName': 'ANDROID_CREATOR',
18c7683d 159 'clientVersion': '21.47',
e7e94f2a
D
160 },
161 },
b6de707d 162 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
163 'REQUIRE_JS_PLAYER': False
e7e94f2a 164 },
18c7683d 165 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
166 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 167 'ios': {
18c7683d 168 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 169 'INNERTUBE_CONTEXT': {
170 'client': {
171 'clientName': 'IOS',
18c7683d 172 'clientVersion': '16.46',
173 'deviceModel': 'iPhone14,3',
000c15a4 174 }
175 },
b6de707d 176 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
177 'REQUIRE_JS_PLAYER': False
000c15a4 178 },
179 'ios_embedded': {
000c15a4 180 'INNERTUBE_CONTEXT': {
181 'client': {
182 'clientName': 'IOS_MESSAGES_EXTENSION',
18c7683d 183 'clientVersion': '16.46',
184 'deviceModel': 'iPhone14,3',
000c15a4 185 },
186 },
b6de707d 187 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
188 'REQUIRE_JS_PLAYER': False
000c15a4 189 },
190 'ios_music': {
18c7683d 191 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 192 'INNERTUBE_CONTEXT': {
193 'client': {
194 'clientName': 'IOS_MUSIC',
18c7683d 195 'clientVersion': '4.57',
000c15a4 196 },
197 },
b6de707d 198 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
199 'REQUIRE_JS_PLAYER': False
000c15a4 200 },
e7e94f2a
D
201 'ios_creator': {
202 'INNERTUBE_CONTEXT': {
203 'client': {
204 'clientName': 'IOS_CREATOR',
18c7683d 205 'clientVersion': '21.47',
e7e94f2a
D
206 },
207 },
b6de707d 208 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
209 'REQUIRE_JS_PLAYER': False
e7e94f2a 210 },
3619f78d 211 # mweb has 'ultralow' formats
212 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 213 'mweb': {
18c7683d 214 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 215 'INNERTUBE_CONTEXT': {
216 'client': {
217 'clientName': 'MWEB',
18c7683d 218 'clientVersion': '2.20211221.01.00',
000c15a4 219 }
220 },
221 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
18c7683d 222 }
000c15a4 223}
224
225
226def build_innertube_clients():
65c2fde2 227 third_party = {
228 'embedUrl': 'https://google.com', # Can be any valid URL
229 }
000c15a4 230 base_clients = ('android', 'web', 'ios', 'mweb')
231 priority = qualities(base_clients[::-1])
232
233 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 234 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 235 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 236 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 237 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
238 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
239
240 if client in base_clients:
241 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
242 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
65c2fde2 243 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 244 agegate_ytcfg['priority'] -= 1
245 elif client.endswith('_embedded'):
65c2fde2 246 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
000c15a4 247 ytcfg['priority'] -= 2
248 else:
249 ytcfg['priority'] -= 3
250
251
252build_innertube_clients()
253
254
de7f3446 255class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 256 """Provide base functions for Youtube extractors"""
e00eb564 257
3462ffa8 258 _RESERVED_NAMES = (
3cd786db 259 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
3619f78d 260 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
261 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 262 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 263
3619f78d 264 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
265
b2e8bc1b 266 _NETRC_MACHINE = 'youtube'
3619f78d 267
b2e8bc1b
JMF
268 # If True it will raise an error if no login info is provided
269 _LOGIN_REQUIRED = False
270
d9190e44
RH
271 _INVIDIOUS_SITES = (
272 # invidious-redirect websites
273 r'(?:www\.)?redirect\.invidious\.io',
274 r'(?:(?:www|dev)\.)?invidio\.us',
275 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
276 r'(?:www\.)?invidious\.pussthecat\.org',
277 r'(?:www\.)?invidious\.zee\.li',
278 r'(?:www\.)?invidious\.ethibox\.fr',
279 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
280 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
281 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
282 # youtube-dl invidious instances list
283 r'(?:(?:www|no)\.)?invidiou\.sh',
284 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
285 r'(?:www\.)?invidious\.kabi\.tk',
286 r'(?:www\.)?invidious\.mastodon\.host',
287 r'(?:www\.)?invidious\.zapashcanon\.fr',
288 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
289 r'(?:www\.)?invidious\.tinfoil-hat\.net',
290 r'(?:www\.)?invidious\.himiko\.cloud',
291 r'(?:www\.)?invidious\.reallyancient\.tech',
292 r'(?:www\.)?invidious\.tube',
293 r'(?:www\.)?invidiou\.site',
294 r'(?:www\.)?invidious\.site',
295 r'(?:www\.)?invidious\.xyz',
296 r'(?:www\.)?invidious\.nixnet\.xyz',
297 r'(?:www\.)?invidious\.048596\.xyz',
298 r'(?:www\.)?invidious\.drycat\.fr',
299 r'(?:www\.)?inv\.skyn3t\.in',
300 r'(?:www\.)?tube\.poal\.co',
301 r'(?:www\.)?tube\.connect\.cafe',
302 r'(?:www\.)?vid\.wxzm\.sx',
303 r'(?:www\.)?vid\.mint\.lgbt',
304 r'(?:www\.)?vid\.puffyan\.us',
305 r'(?:www\.)?yewtu\.be',
306 r'(?:www\.)?yt\.elukerio\.org',
307 r'(?:www\.)?yt\.lelux\.fi',
308 r'(?:www\.)?invidious\.ggc-project\.de',
309 r'(?:www\.)?yt\.maisputain\.ovh',
310 r'(?:www\.)?ytprivate\.com',
311 r'(?:www\.)?invidious\.13ad\.de',
312 r'(?:www\.)?invidious\.toot\.koeln',
313 r'(?:www\.)?invidious\.fdn\.fr',
314 r'(?:www\.)?watch\.nettohikari\.com',
315 r'(?:www\.)?invidious\.namazso\.eu',
316 r'(?:www\.)?invidious\.silkky\.cloud',
317 r'(?:www\.)?invidious\.exonip\.de',
318 r'(?:www\.)?invidious\.riverside\.rocks',
319 r'(?:www\.)?invidious\.blamefran\.net',
320 r'(?:www\.)?invidious\.moomoo\.de',
321 r'(?:www\.)?ytb\.trom\.tf',
322 r'(?:www\.)?yt\.cyberhost\.uk',
323 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
324 r'(?:www\.)?qklhadlycap4cnod\.onion',
325 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
326 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
327 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
328 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
329 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
330 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
331 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
332 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
333 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
334 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
335 )
336
b2e8bc1b 337 def _login(self):
83317f69 338 """
339 Attempt to log in to YouTube.
83317f69 340 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
341 """
9d5d4d64 342
982ee69a
MB
343 if (self._LOGIN_REQUIRED
344 and self.get_param('cookiefile') is None
345 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 346 self.raise_login_required(
347 'Login details are needed to download this content', method='cookies')
68217024 348 username, password = self._get_login_info()
9d5d4d64 349 if username:
24b0a72b 350 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
b2e8bc1b 351
cce889b9 352 def _initialize_consent(self):
353 cookies = self._get_cookies('https://www.youtube.com/')
354 if cookies.get('__Secure-3PSID'):
355 return
356 consent_id = None
357 consent = cookies.get('CONSENT')
358 if consent:
359 if 'YES' in consent.value:
360 return
361 consent_id = self._search_regex(
362 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
363 if not consent_id:
364 consent_id = random.randint(100, 999)
365 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 366
f3aa3c3f 367 def _initialize_pref(self):
368 cookies = self._get_cookies('https://www.youtube.com/')
369 pref_cookie = cookies.get('PREF')
370 pref = {}
371 if pref_cookie:
372 try:
373 pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
374 except ValueError:
375 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
396a76f7 376 pref.update({'hl': 'en', 'tz': 'UTC'})
f3aa3c3f 377 self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
378
b2e8bc1b 379 def _real_initialize(self):
f3aa3c3f 380 self._initialize_pref()
cce889b9 381 self._initialize_consent()
24b0a72b 382 self._login()
c5e8d7af 383
a0566bbf 384 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 385 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
386 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 387
000c15a4 388 def _get_default_ytcfg(self, client='web'):
389 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 390
000c15a4 391 def _get_innertube_host(self, client='web'):
392 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 393
000c15a4 394 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 395 # try_get but with fallback to default ytcfg client values when present
396 _func = lambda y: try_get(y, getter, expected_type)
397 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
398
000c15a4 399 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 400 return self._ytcfg_get_safe(
401 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
402 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
109dd3b2 403
000c15a4 404 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 405 return self._ytcfg_get_safe(
406 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
407 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
109dd3b2 408
000c15a4 409 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 410 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
411
000c15a4 412 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 413 context = get_first(
414 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 415 # Enforce language and tz for extraction
416 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
417 client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 418 return context
419
cf87314d 420 _SAPISID = None
421
109dd3b2 422 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 423 time_now = round(time.time())
cf87314d 424 if self._SAPISID is None:
425 yt_cookies = self._get_cookies('https://www.youtube.com')
426 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
427 # See: https://github.com/yt-dlp/yt-dlp/issues/393
428 sapisid_cookie = dict_get(
429 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
430 if sapisid_cookie and sapisid_cookie.value:
431 self._SAPISID = sapisid_cookie.value
432 self.write_debug('Extracted SAPISID cookie')
433 # SAPISID cookie is required if not already present
434 if not yt_cookies.get('SAPISID'):
435 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
436 self._set_cookie(
437 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
438 else:
439 self._SAPISID = False
440 if not self._SAPISID:
441 return None
1974e99f 442 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
443 sapisidhash = hashlib.sha1(
cf87314d 444 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
1974e99f 445 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
446
447 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 448 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 449 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 450
109dd3b2 451 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 452 data.update(query)
11f9be09 453 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 454 real_headers.update({'content-type': 'application/json'})
455 if headers:
456 real_headers.update(headers)
545cc85d 457 return self._download_json(
109dd3b2 458 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 459 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 460 data=json.dumps(data).encode('utf8'), headers=real_headers,
461 query={'key': api_key or self._extract_api_key()})
462
ac56cf38 463 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
464 data = self._search_regex(
465 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
466 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
467 if data:
468 return self._parse_json(data, item_id, fatal=fatal)
0c148415 469
99e9e001 470 @staticmethod
471 def _extract_session_index(*data):
472 """
473 Index of current account in account list.
474 See: https://github.com/yt-dlp/yt-dlp/pull/519
475 """
476 for ytcfg in data:
477 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
478 if session_index is not None:
479 return session_index
480
481 # Deprecated?
482 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca
M
483 if ytcfg:
484 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
485 if token:
486 return token
99e9e001 487 if webpage:
488 return self._search_regex(
489 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
490 'identity token', default=None, fatal=False)
a1c5d2ca
M
491
492 @staticmethod
fe93e2c4 493 def _extract_account_syncid(*args):
8ea3f7b9 494 """
495 Extract syncId required to download private playlists of secondary channels
fe93e2c4 496 @params response and/or ytcfg
8ea3f7b9 497 """
fe93e2c4 498 for data in args:
499 # ytcfg includes channel_syncid if on secondary channel
500 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
501 if delegated_sid:
502 return delegated_sid
503 sync_ids = (try_get(
504 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
e6f21b3d 505 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
fe93e2c4 506 if len(sync_ids) >= 2 and sync_ids[1]:
507 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
508 # and just "user_syncid||" for primary channel. We only want the channel_syncid
509 return sync_ids[0]
a1c5d2ca 510
ac56cf38 511 @staticmethod
512 def _extract_visitor_data(*args):
513 """
514 Extracts visitorData from an API response or ytcfg
515 Appears to be used to track session state
516 """
9222c381 517 return get_first(
518 args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
519 expected_type=str)
ac56cf38 520
99e9e001 521 @property
522 def is_authenticated(self):
523 return bool(self._generate_sapisidhash_header())
524
11f9be09 525 def extract_ytcfg(self, video_id, webpage):
8c54a305 526 if not webpage:
527 return {}
29f7c58a 528 return self._parse_json(
529 self._search_regex(
530 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 531 default='{}'), video_id, fatal=False) or {}
532
11f9be09 533 def generate_api_headers(
99e9e001 534 self, *, ytcfg=None, account_syncid=None, session_index=None,
535 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
536
11f9be09 537 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 538 headers = {
109dd3b2 539 'X-YouTube-Client-Name': compat_str(
11f9be09 540 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
541 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 542 'Origin': origin,
543 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
544 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
ac56cf38 545 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
99e9e001 546 }
547 if session_index is None:
314ee305 548 session_index = self._extract_session_index(ytcfg)
549 if account_syncid or session_index is not None:
550 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 551
109dd3b2 552 auth = self._generate_sapisidhash_header(origin)
f4f751af 553 if auth is not None:
554 headers['Authorization'] = auth
109dd3b2 555 headers['X-Origin'] = origin
99e9e001 556 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 557
2d6659b9 558 @staticmethod
559 def _build_api_continuation_query(continuation, ctp=None):
560 query = {
561 'continuation': continuation
562 }
563 # TODO: Inconsistency with clickTrackingParams.
564 # Currently we have a fixed ctp contained within context (from ytcfg)
565 # and a ctp in root query for continuation.
566 if ctp:
567 query['clickTracking'] = {'clickTrackingParams': ctp}
568 return query
569
2d6659b9 570 @classmethod
571 def _extract_next_continuation_data(cls, renderer):
572 next_continuation = try_get(
573 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
574 lambda x: x['continuation']['reloadContinuationData']), dict)
575 if not next_continuation:
576 return
577 continuation = next_continuation.get('continuation')
578 if not continuation:
579 return
580 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 581 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 582
583 @classmethod
584 def _extract_continuation_ep_data(cls, continuation_ep: dict):
585 if isinstance(continuation_ep, dict):
586 continuation = try_get(
587 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
588 if not continuation:
589 return
590 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 591 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 592
593 @classmethod
594 def _extract_continuation(cls, renderer):
595 next_continuation = cls._extract_next_continuation_data(renderer)
596 if next_continuation:
597 return next_continuation
fe93e2c4 598
2d6659b9 599 contents = []
600 for key in ('contents', 'items'):
601 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 602
2d6659b9 603 for content in contents:
604 if not isinstance(content, dict):
605 continue
606 continuation_ep = try_get(
607 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
608 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
609 dict)
610 continuation = cls._extract_continuation_ep_data(continuation_ep)
611 if continuation:
612 return continuation
613
fe93e2c4 614 @classmethod
615 def _extract_alerts(cls, data):
109dd3b2 616 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
617 if not isinstance(alert_dict, dict):
618 continue
619 for alert in alert_dict.values():
620 alert_type = alert.get('type')
621 if not alert_type:
622 continue
052e1350 623 message = cls._get_text(alert, 'text')
109dd3b2 624 if message:
625 yield alert_type, message
626
c0ac49bc 627 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 628 errors = []
629 warnings = []
630 for alert_type, alert_message in alerts:
641ad5d8 631 if alert_type.lower() == 'error' and fatal:
109dd3b2 632 errors.append([alert_type, alert_message])
633 else:
634 warnings.append([alert_type, alert_message])
635
636 for alert_type, alert_message in (warnings + errors[:-1]):
c0ac49bc 637 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
109dd3b2 638 if errors:
639 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
640
641 def _extract_and_report_alerts(self, data, *args, **kwargs):
642 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
643
47193e02 644 def _extract_badges(self, renderer: dict):
645 badges = set()
646 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
647 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
648 if label:
649 badges.add(label.lower())
650 return badges
651
652 @staticmethod
052e1350 653 def _get_text(data, *path_list, max_runs=None):
654 for path in path_list or [None]:
655 if path is None:
656 obj = [data]
657 else:
658 obj = traverse_obj(data, path, default=[])
659 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
660 obj = [obj]
661 for item in obj:
662 text = try_get(item, lambda x: x['simpleText'], compat_str)
663 if text:
664 return text
665 runs = try_get(item, lambda x: x['runs'], list) or []
666 if not runs and isinstance(item, list):
667 runs = item
668
669 runs = runs[:min(len(runs), max_runs or len(runs))]
670 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
671 if text:
672 return text
47193e02 673
f0d785d3 674 def _get_count(self, data, *path_list):
675 count_text = self._get_text(data, *path_list) or ''
676 count = parse_count(count_text)
677 if count is None:
678 count = str_to_int(
679 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
680 return count
681
a709d873 682 @staticmethod
683 def _extract_thumbnails(data, *path_list):
684 """
685 Extract thumbnails from thumbnails dict
686 @param path_list: path list to level that contains 'thumbnails' key
687 """
688 thumbnails = []
689 for path in path_list or [()]:
690 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
691 thumbnail_url = url_or_none(thumbnail.get('url'))
692 if not thumbnail_url:
693 continue
694 # Sometimes youtube gives a wrong thumbnail URL. See:
695 # https://github.com/yt-dlp/yt-dlp/issues/233
696 # https://github.com/ytdl-org/youtube-dl/issues/28023
697 if 'maxresdefault' in thumbnail_url:
698 thumbnail_url = thumbnail_url.split('?')[0]
699 thumbnails.append({
700 'url': thumbnail_url,
701 'height': int_or_none(thumbnail.get('height')),
702 'width': int_or_none(thumbnail.get('width')),
703 })
704 return thumbnails
705
f3aa3c3f 706 @staticmethod
707 def extract_relative_time(relative_time_text):
708 """
709 Extracts a relative time from string and converts to dt object
f0d785d3 710 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
f3aa3c3f 711 """
f0d785d3 712 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
f3aa3c3f 713 if mobj:
f0d785d3 714 start = mobj.group('start')
715 if start:
716 return datetime_from_str(start)
f3aa3c3f 717 try:
f0d785d3 718 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
f3aa3c3f 719 except ValueError:
720 return None
721
722 def _extract_time_text(self, renderer, *path_list):
723 text = self._get_text(renderer, *path_list) or ''
724 dt = self.extract_relative_time(text)
725 timestamp = None
726 if isinstance(dt, datetime.datetime):
727 timestamp = calendar.timegm(dt.timetuple())
f0d785d3 728
729 if timestamp is None:
730 timestamp = (
731 unified_timestamp(text) or unified_timestamp(
732 self._search_regex(
396a76f7 733 (r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*on)?\s*(.+\d)', r'\w+[\s,\.-]*\w+[\s,\.-]+20\d{2}'),
734 text.lower(), 'time text', default=None)))
f0d785d3 735
f3aa3c3f 736 if text and timestamp is None:
737 self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)
738 return timestamp, text
739
109dd3b2 740 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
741 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 742 default_client='web'):
109dd3b2 743 response = None
744 last_error = None
745 count = -1
746 retries = self.get_param('extractor_retries', 3)
747 if check_get_keys is None:
748 check_get_keys = []
749 while count < retries:
750 count += 1
751 if last_error:
c0ac49bc 752 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
109dd3b2 753 try:
754 response = self._call_api(
755 ep=ep, fatal=True, headers=headers,
756 video_id=item_id, query=query,
757 context=self._extract_context(ytcfg, default_client),
758 api_key=self._extract_api_key(ytcfg, default_client),
759 api_hostname=api_hostname, default_client=default_client,
760 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
761 except ExtractorError as e:
9c0d7f49 762 if isinstance(e.cause, network_exceptions):
641ad5d8 763 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
764 e.cause.seek(0)
765 yt_error = try_get(
766 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
767 lambda x: x['error']['message'], compat_str)
768 if yt_error:
769 self._report_alerts([('ERROR', yt_error)], fatal=False)
109dd3b2 770 # Downloading page may result in intermittent 5xx HTTP error
771 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 772 # We also want to catch all other network exceptions since errors in later pages can be troublesome
773 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
774 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
526d74ec 775 last_error = error_to_compat_str(e.cause or e.msg)
9c0d7f49 776 if count < retries:
777 continue
109dd3b2 778 if fatal:
779 raise
780 else:
781 self.report_warning(error_to_compat_str(e))
782 return
783
784 else:
109dd3b2 785 try:
ac56cf38 786 self._extract_and_report_alerts(response, only_once=True)
109dd3b2 787 except ExtractorError as e:
c0ac49bc 788 # YouTube servers may return errors we want to retry on in a 200 OK response
789 # See: https://github.com/yt-dlp/yt-dlp/issues/839
790 if 'unknown error' in e.msg.lower():
791 last_error = e.msg
792 continue
109dd3b2 793 if fatal:
794 raise
795 self.report_warning(error_to_compat_str(e))
796 return
797 if not check_get_keys or dict_get(response, check_get_keys):
798 break
799 # Youtube sometimes sends incomplete data
800 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
801 last_error = 'Incomplete data received'
802 if count >= retries:
803 if fatal:
804 raise ExtractorError(last_error)
805 else:
806 self.report_warning(last_error)
807 return
808 return response
809
9297939e 810 @staticmethod
811 def is_music_url(url):
812 return re.match(r'https?://music\.youtube\.com/', url) is not None
813
30a074c2 814 def _extract_video(self, renderer):
815 video_id = renderer.get('videoId')
052e1350 816 title = self._get_text(renderer, 'title')
817 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 818 duration = parse_duration(self._get_text(
819 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
f0d785d3 820 view_count = self._get_count(renderer, 'viewCountText')
fe93e2c4 821
052e1350 822 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
f3aa3c3f 823 channel_id = traverse_obj(
824 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)
825 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
826 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
827 overlay_style = traverse_obj(
828 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
829 badges = self._extract_badges(renderer)
a709d873 830 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
831
30a074c2 832 return {
39ed931e 833 '_type': 'url',
30a074c2 834 'ie_key': YoutubeIE.ie_key(),
835 'id': video_id,
5e3f2f8f 836 'url': f'https://www.youtube.com/watch?v={video_id}',
30a074c2 837 'title': title,
838 'description': description,
839 'duration': duration,
840 'view_count': view_count,
841 'uploader': uploader,
f3aa3c3f 842 'channel_id': channel_id,
a709d873 843 'thumbnails': thumbnails,
66f7c6a3 844 # 'upload_date': strftime_or_none(timestamp, '%Y%m%d'),
f3aa3c3f 845 'live_status': ('is_upcoming' if scheduled_timestamp is not None
846 else 'was_live' if 'streamed' in time_text.lower()
847 else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
848 else None),
849 'release_timestamp': scheduled_timestamp,
850 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
30a074c2 851 }
852
0c148415 853
360e1ca5 854class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 855 IE_DESC = 'YouTube'
cb7dfeea 856 _VALID_URL = r"""(?x)^
c5e8d7af 857 (
edb53e2d 858 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 859 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
860 (?:www\.)?deturl\.com/www\.youtube\.com|
861 (?:www\.)?pwnyoutube\.com|
862 (?:www\.)?hooktube\.com|
863 (?:www\.)?yourepeat\.com|
864 tube\.majestyc\.net|
865 %(invidious)s|
866 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
867 (?:.*?\#/)? # handle anchor (#/) redirect urls
868 (?: # the various things that can precede the ID:
b6ce9bb0 869 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 870 |(?: # or the v= param in all its forms
f7000f3a 871 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 872 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 873 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
874 v=
875 )
f4b05232 876 ))
cbaed4bb
S
877 |(?:
878 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
879 vid\.plus| # or vid.plus/xxxx
880 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 881 %(invidious)s
cbaed4bb 882 )/
edb53e2d 883 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 884 )
c5e8d7af 885 )? # all until now is optional -> you can pass the naked ID
201c1459 886 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 887 (?(1).+)? # if we found the ID, everything can follow
9297939e 888 (?:\#|$)""" % {
d9190e44 889 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 890 }
e40c758c 891 _PLAYER_INFO_RE = (
cc2db878 892 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
893 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 894 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 895 )
2c62dc26 896 _formats = {
c2d3cb4c 897 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
898 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
899 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
900 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
901 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
902 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
903 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
904 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 905 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 906 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
907 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
908 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
909 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
910 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
911 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 912 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 913 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
914 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 915
916
917 # 3D videos
c2d3cb4c 918 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
919 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
920 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
921 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 922 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
923 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
924 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 925
96fb5605 926 # Apple HTTP Live Streaming
11f12195 927 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 928 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
929 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
930 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
931 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
932 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 933 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
934 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
935
936 # DASH mp4 video
d23028a8
S
937 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
938 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
939 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
940 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
941 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 942 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
943 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
944 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
945 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
946 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
947 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
948 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 949
f6f1fc92 950 # Dash mp4 audio
d23028a8
S
951 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
952 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
953 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
954 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
955 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
956 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
957 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
958
959 # Dash webm
d23028a8
S
960 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
961 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
962 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
963 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
964 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
965 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
966 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
967 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
968 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
969 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
970 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
971 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
972 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
973 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
974 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 975 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
976 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
977 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
978 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
979 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
980 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
981 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
982
983 # Dash webm audio
d23028a8
S
984 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
985 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 986
0857baad 987 # Dash webm audio with opus inside
d23028a8
S
988 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
989 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
990 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 991
ce6b9a2d
PH
992 # RTMP (unnamed)
993 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
994
995 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
996 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
997 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
998 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
999 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1000 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1001 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1002 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1003 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1004 }
29f7c58a 1005 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1006
fd5c4aab
S
1007 _GEO_BYPASS = False
1008
78caa52a 1009 IE_NAME = 'youtube'
2eb88d95
PH
1010 _TESTS = [
1011 {
2d3d2997 1012 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1013 'info_dict': {
1014 'id': 'BaW_jenozKc',
1015 'ext': 'mp4',
3867038a 1016 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1017 'uploader': 'Philipp Hagemeister',
1018 'uploader_id': 'phihag',
ec85ded8 1019 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 1020 'channel': 'Philipp Hagemeister',
dd4c4492
S
1021 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1022 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1023 'upload_date': '20121002',
ff9f925b 1024 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1025 'categories': ['Science & Technology'],
3867038a 1026 'tags': ['youtube-dl'],
556dbe7f 1027 'duration': 10,
dbdaaa23 1028 'view_count': int,
3e7c1224 1029 'like_count': int,
ff9f925b 1030 'availability': 'public',
1031 'playable_in_embed': True,
1032 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1033 'live_status': 'not_live',
1034 'age_limit': 0,
7c80519c 1035 'start_time': 1,
297a564b 1036 'end_time': 9,
2eb88d95 1037 }
0e853ca4 1038 },
fccd3771 1039 {
4bc3a23e
PH
1040 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1041 'note': 'Embed-only video (#1746)',
1042 'info_dict': {
1043 'id': 'yZIXLfi8CZQ',
1044 'ext': 'mp4',
1045 'upload_date': '20120608',
1046 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1047 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1048 'uploader': 'SET India',
94bfcd23 1049 'uploader_id': 'setindia',
ec85ded8 1050 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1051 'age_limit': 18,
545cc85d 1052 },
1053 'skip': 'Private video',
fccd3771 1054 },
11b56058 1055 {
8bdd16b4 1056 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1057 'note': 'Use the first video ID in the URL',
1058 'info_dict': {
1059 'id': 'BaW_jenozKc',
1060 'ext': 'mp4',
3867038a 1061 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1062 'uploader': 'Philipp Hagemeister',
1063 'uploader_id': 'phihag',
ec85ded8 1064 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
976ae3ea 1065 'channel': 'Philipp Hagemeister',
1066 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1067 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1068 'upload_date': '20121002',
976ae3ea 1069 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1070 'categories': ['Science & Technology'],
3867038a 1071 'tags': ['youtube-dl'],
556dbe7f 1072 'duration': 10,
dbdaaa23 1073 'view_count': int,
11b56058 1074 'like_count': int,
976ae3ea 1075 'availability': 'public',
1076 'playable_in_embed': True,
1077 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1078 'live_status': 'not_live',
1079 'age_limit': 0,
34a7de29
S
1080 },
1081 'params': {
1082 'skip_download': True,
1083 },
11b56058 1084 },
dd27fd17 1085 {
2d3d2997 1086 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1087 'note': '256k DASH audio (format 141) via DASH manifest',
1088 'info_dict': {
1089 'id': 'a9LDPn-MO4I',
1090 'ext': 'm4a',
1091 'upload_date': '20121002',
1092 'uploader_id': '8KVIDEO',
ec85ded8 1093 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1094 'description': '',
1095 'uploader': '8KVIDEO',
1096 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1097 },
4bc3a23e
PH
1098 'params': {
1099 'youtube_include_dash_manifest': True,
1100 'format': '141',
4919603f 1101 },
de3c7fe0 1102 'skip': 'format 141 not served anymore',
dd27fd17 1103 },
8bdd16b4 1104 # DASH manifest with encrypted signature
1105 {
1106 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1107 'info_dict': {
1108 'id': 'IB3lcPjvWLA',
1109 'ext': 'm4a',
1110 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1111 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1112 'duration': 244,
1113 'uploader': 'AfrojackVEVO',
1114 'uploader_id': 'AfrojackVEVO',
1115 'upload_date': '20131011',
cc2db878 1116 'abr': 129.495,
976ae3ea 1117 'like_count': int,
1118 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1119 'playable_in_embed': True,
1120 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1121 'view_count': int,
1122 'track': 'The Spark',
1123 'live_status': 'not_live',
1124 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1125 'channel': 'Afrojack',
1126 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1127 'tags': 'count:19',
1128 'availability': 'public',
1129 'categories': ['Music'],
1130 'age_limit': 0,
1131 'alt_title': 'The Spark',
8bdd16b4 1132 },
1133 'params': {
1134 'youtube_include_dash_manifest': True,
1135 'format': '141/bestaudio[ext=m4a]',
1136 },
1137 },
65c2fde2 1138 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1139 {
65c2fde2 1140 'note': 'Embed allowed age-gate video',
2d3d2997 1141 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1142 'info_dict': {
1143 'id': 'HtVdAasjOgU',
1144 'ext': 'mp4',
1145 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1146 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1147 'duration': 142,
c522adb1
JMF
1148 'uploader': 'The Witcher',
1149 'uploader_id': 'WitcherGame',
ec85ded8 1150 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1151 'upload_date': '20140605',
34952f09 1152 'age_limit': 18,
976ae3ea 1153 'categories': ['Gaming'],
1154 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1155 'availability': 'needs_auth',
1156 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1157 'like_count': int,
1158 'channel': 'The Witcher',
1159 'live_status': 'not_live',
1160 'tags': 'count:17',
1161 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1162 'playable_in_embed': True,
1163 'view_count': int,
c522adb1
JMF
1164 },
1165 },
65c2fde2 1166 {
1167 'note': 'Age-gate video with embed allowed in public site',
1168 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1169 'info_dict': {
1170 'id': 'HsUATh_Nc2U',
1171 'ext': 'mp4',
1172 'title': 'Godzilla 2 (Official Video)',
1173 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1174 'upload_date': '20200408',
1175 'uploader_id': 'FlyingKitty900',
1176 'uploader': 'FlyingKitty',
1177 'age_limit': 18,
976ae3ea 1178 'availability': 'needs_auth',
1179 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1180 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1181 'channel': 'FlyingKitty',
1182 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1183 'view_count': int,
1184 'categories': ['Entertainment'],
1185 'live_status': 'not_live',
1186 'tags': ['Flyingkitty', 'godzilla 2'],
1187 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1188 'like_count': int,
1189 'duration': 177,
1190 'playable_in_embed': True,
65c2fde2 1191 },
1192 },
1193 {
1194 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1195 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1196 'info_dict': {
1197 'id': 'Tq92D6wQ1mg',
1198 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1199 'ext': 'mp4',
1200 'upload_date': '20191227',
65c2fde2 1201 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1202 'uploader': 'Projekt Melody',
1203 'description': 'md5:17eccca93a786d51bc67646756894066',
1204 'age_limit': 18,
976ae3ea 1205 'like_count': int,
1206 'availability': 'needs_auth',
1207 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1208 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1209 'view_count': int,
1210 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1211 'channel': 'Projekt Melody',
1212 'live_status': 'not_live',
1213 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1214 'playable_in_embed': True,
1215 'categories': ['Entertainment'],
1216 'duration': 106,
1217 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
65c2fde2 1218 },
1219 },
1220 {
1221 'note': 'Non-Agegated non-embeddable video',
1222 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1223 'info_dict': {
1224 'id': 'MeJVWBSsPAY',
1225 'ext': 'mp4',
1226 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1227 'uploader': 'Herr Lurik',
1228 'uploader_id': 'st3in234',
1229 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1230 'upload_date': '20130730',
976ae3ea 1231 'track': 'Such mich find mich',
1232 'age_limit': 0,
1233 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1234 'like_count': int,
1235 'playable_in_embed': False,
1236 'creator': 'OOMPH!',
1237 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1238 'view_count': int,
1239 'alt_title': 'Such mich find mich',
1240 'duration': 210,
1241 'channel': 'Herr Lurik',
1242 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1243 'categories': ['Music'],
1244 'availability': 'public',
1245 'uploader_url': 'http://www.youtube.com/user/st3in234',
1246 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1247 'live_status': 'not_live',
1248 'artist': 'OOMPH!',
65c2fde2 1249 },
1250 },
1251 {
1252 'note': 'Non-bypassable age-gated video',
1253 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1254 'only_matching': True,
1255 },
8bdd16b4 1256 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1257 # YouTube Red ad is not captured for creator
1258 {
1259 'url': '__2ABJjxzNo',
1260 'info_dict': {
1261 'id': '__2ABJjxzNo',
1262 'ext': 'mp4',
1263 'duration': 266,
1264 'upload_date': '20100430',
1265 'uploader_id': 'deadmau5',
1266 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1267 'creator': 'deadmau5',
1268 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1269 'uploader': 'deadmau5',
1270 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1271 'alt_title': 'Some Chords',
976ae3ea 1272 'availability': 'public',
1273 'tags': 'count:14',
1274 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1275 'view_count': int,
1276 'live_status': 'not_live',
1277 'channel': 'deadmau5',
1278 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1279 'like_count': int,
1280 'track': 'Some Chords',
1281 'artist': 'deadmau5',
1282 'playable_in_embed': True,
1283 'age_limit': 0,
1284 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1285 'categories': ['Music'],
1286 'album': 'Some Chords',
8bdd16b4 1287 },
1288 'expected_warnings': [
1289 'DASH manifest missing',
1290 ]
1291 },
067aa17e 1292 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1293 {
1294 'url': 'lqQg6PlCWgI',
1295 'info_dict': {
1296 'id': 'lqQg6PlCWgI',
1297 'ext': 'mp4',
556dbe7f 1298 'duration': 6085,
90227264 1299 'upload_date': '20150827',
cbe2bd91 1300 'uploader_id': 'olympic',
ec85ded8 1301 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1302 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1303 'uploader': 'Olympics',
cbe2bd91 1304 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1305 'like_count': int,
1306 'release_timestamp': 1343767800,
1307 'playable_in_embed': True,
1308 'categories': ['Sports'],
1309 'release_date': '20120731',
1310 'channel': 'Olympics',
1311 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1312 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1313 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1314 'age_limit': 0,
1315 'availability': 'public',
1316 'live_status': 'was_live',
1317 'view_count': int,
1318 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
cbe2bd91
PH
1319 },
1320 'params': {
1321 'skip_download': 'requires avconv',
e52a40ab 1322 }
cbe2bd91 1323 },
6271f1ca
PH
1324 # Non-square pixels
1325 {
1326 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1327 'info_dict': {
1328 'id': '_b-2C3KPAM0',
1329 'ext': 'mp4',
1330 'stretched_ratio': 16 / 9.,
556dbe7f 1331 'duration': 85,
6271f1ca
PH
1332 'upload_date': '20110310',
1333 'uploader_id': 'AllenMeow',
ec85ded8 1334 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1335 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1336 'uploader': '孫ᄋᄅ',
6271f1ca 1337 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1338 'playable_in_embed': True,
1339 'channel': '孫ᄋᄅ',
1340 'age_limit': 0,
1341 'tags': 'count:11',
1342 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1343 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1344 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1345 'view_count': int,
1346 'categories': ['People & Blogs'],
1347 'like_count': int,
1348 'live_status': 'not_live',
1349 'availability': 'unlisted',
6271f1ca 1350 },
06b491eb
S
1351 },
1352 # url_encoded_fmt_stream_map is empty string
1353 {
1354 'url': 'qEJwOuvDf7I',
1355 'info_dict': {
1356 'id': 'qEJwOuvDf7I',
f57b7835 1357 'ext': 'webm',
06b491eb
S
1358 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1359 'description': '',
1360 'upload_date': '20150404',
1361 'uploader_id': 'spbelect',
1362 'uploader': 'Наблюдатели Петербурга',
1363 },
1364 'params': {
1365 'skip_download': 'requires avconv',
e323cf3f
S
1366 },
1367 'skip': 'This live event has ended.',
06b491eb 1368 },
067aa17e 1369 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1370 {
1371 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1372 'info_dict': {
1373 'id': 'FIl7x6_3R5Y',
eb6793ba 1374 'ext': 'webm',
da77d856
S
1375 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1376 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1377 'duration': 220,
da77d856
S
1378 'upload_date': '20150625',
1379 'uploader_id': 'dorappi2000',
ec85ded8 1380 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1381 'uploader': 'dorappi2000',
eb6793ba 1382 'formats': 'mincount:31',
da77d856 1383 },
eb6793ba 1384 'skip': 'not actual anymore',
2ee8f5d8 1385 },
8a1a26ce
YCH
1386 # DASH manifest with segment_list
1387 {
1388 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1389 'md5': '8ce563a1d667b599d21064e982ab9e31',
1390 'info_dict': {
1391 'id': 'CsmdDsKjzN8',
1392 'ext': 'mp4',
17ee98e1 1393 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1394 'uploader': 'Airtek',
1395 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1396 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1397 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1398 },
1399 'params': {
1400 'youtube_include_dash_manifest': True,
1401 'format': '135', # bestvideo
be49068d
S
1402 },
1403 'skip': 'This live event has ended.',
2ee8f5d8 1404 },
cf7e015f
S
1405 {
1406 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1407 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1408 'info_dict': {
545cc85d 1409 'id': 'jvGDaLqkpTg',
1410 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1411 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1412 },
1413 'playlist': [{
1414 'info_dict': {
545cc85d 1415 'id': 'jvGDaLqkpTg',
cf7e015f 1416 'ext': 'mp4',
545cc85d 1417 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1418 'description': 'md5:e03b909557865076822aa169218d6a5d',
1419 'duration': 10643,
1420 'upload_date': '20161111',
1421 'uploader': 'Team PGP',
1422 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1423 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1424 },
1425 }, {
1426 'info_dict': {
545cc85d 1427 'id': '3AKt1R1aDnw',
cf7e015f 1428 'ext': 'mp4',
545cc85d 1429 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1430 'description': 'md5:e03b909557865076822aa169218d6a5d',
1431 'duration': 10991,
1432 'upload_date': '20161111',
1433 'uploader': 'Team PGP',
1434 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1435 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1436 },
1437 }, {
1438 'info_dict': {
545cc85d 1439 'id': 'RtAMM00gpVc',
cf7e015f 1440 'ext': 'mp4',
545cc85d 1441 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1442 'description': 'md5:e03b909557865076822aa169218d6a5d',
1443 'duration': 10995,
1444 'upload_date': '20161111',
1445 'uploader': 'Team PGP',
1446 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1447 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1448 },
1449 }, {
1450 'info_dict': {
545cc85d 1451 'id': '6N2fdlP3C5U',
cf7e015f 1452 'ext': 'mp4',
545cc85d 1453 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1454 'description': 'md5:e03b909557865076822aa169218d6a5d',
1455 'duration': 10990,
1456 'upload_date': '20161111',
1457 'uploader': 'Team PGP',
1458 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1459 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1460 },
1461 }],
1462 'params': {
1463 'skip_download': True,
1464 },
65c2fde2 1465 'skip': 'Not multifeed anymore',
cbaed4bb 1466 },
f9f49d87 1467 {
067aa17e 1468 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1469 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1470 'info_dict': {
1471 'id': 'gVfLd0zydlo',
1472 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1473 },
1474 'playlist_count': 2,
be49068d 1475 'skip': 'Not multifeed anymore',
f9f49d87 1476 },
cbaed4bb 1477 {
2d3d2997 1478 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1479 'only_matching': True,
0e49d9a6 1480 },
6d4fc66b 1481 {
2d3d2997 1482 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1483 'only_matching': True,
1484 },
0e49d9a6 1485 {
067aa17e 1486 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1487 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1488 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1489 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1490 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1491 'info_dict': {
1492 'id': 'lsguqyKfVQg',
1493 'ext': 'mp4',
1494 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1495 'alt_title': 'Dark Walk',
0e49d9a6 1496 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1497 'duration': 133,
0e49d9a6
LL
1498 'upload_date': '20151119',
1499 'uploader_id': 'IronSoulElf',
ec85ded8 1500 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1501 'uploader': 'IronSoulElf',
11f9be09 1502 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1503 'track': 'Dark Walk',
1504 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1505 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1506 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1507 'categories': ['Film & Animation'],
1508 'view_count': int,
1509 'live_status': 'not_live',
1510 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1511 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1512 'tags': 'count:13',
1513 'availability': 'public',
1514 'channel': 'IronSoulElf',
1515 'playable_in_embed': True,
1516 'like_count': int,
1517 'age_limit': 0,
0e49d9a6
LL
1518 },
1519 'params': {
1520 'skip_download': True,
1521 },
1522 },
61f92af1 1523 {
067aa17e 1524 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1525 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1526 'only_matching': True,
1527 },
313dfc45
LL
1528 {
1529 # Video with yt:stretch=17:0
1530 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1531 'info_dict': {
1532 'id': 'Q39EVAstoRM',
1533 'ext': 'mp4',
1534 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1535 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1536 'upload_date': '20151107',
1537 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1538 'uploader': 'CH GAMER DROID',
1539 },
1540 'params': {
1541 'skip_download': True,
1542 },
be49068d 1543 'skip': 'This video does not exist.',
313dfc45 1544 },
201c1459 1545 {
1546 # Video with incomplete 'yt:stretch=16:'
1547 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1548 'only_matching': True,
1549 },
7caf9830
S
1550 {
1551 # Video licensed under Creative Commons
1552 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1553 'info_dict': {
1554 'id': 'M4gD1WSo5mA',
1555 'ext': 'mp4',
1556 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1557 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1558 'duration': 721,
7caf9830
S
1559 'upload_date': '20150127',
1560 'uploader_id': 'BerkmanCenter',
ec85ded8 1561 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1562 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830 1563 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1564 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1565 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1566 'like_count': int,
1567 'age_limit': 0,
1568 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1569 'channel': 'The Berkman Klein Center for Internet & Society',
1570 'availability': 'public',
1571 'view_count': int,
1572 'categories': ['Education'],
1573 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1574 'live_status': 'not_live',
1575 'playable_in_embed': True,
7caf9830
S
1576 },
1577 'params': {
1578 'skip_download': True,
1579 },
1580 },
fd050249
S
1581 {
1582 # Channel-like uploader_url
1583 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1584 'info_dict': {
1585 'id': 'eQcmzGIKrzg',
1586 'ext': 'mp4',
1587 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1588 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1589 'duration': 4060,
fd050249 1590 'upload_date': '20151119',
eb6793ba 1591 'uploader': 'Bernie Sanders',
fd050249 1592 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1593 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249 1594 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1595 'playable_in_embed': True,
1596 'tags': 'count:12',
1597 'like_count': int,
1598 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1599 'age_limit': 0,
1600 'availability': 'public',
1601 'categories': ['News & Politics'],
1602 'channel': 'Bernie Sanders',
1603 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1604 'view_count': int,
1605 'live_status': 'not_live',
1606 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1607 },
1608 'params': {
1609 'skip_download': True,
1610 },
1611 },
040ac686
S
1612 {
1613 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1614 'only_matching': True,
7f29cf54
S
1615 },
1616 {
067aa17e 1617 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1618 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1619 'only_matching': True,
6496ccb4
S
1620 },
1621 {
1622 # Rental video preview
1623 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1624 'info_dict': {
1625 'id': 'uGpuVWrhIzE',
1626 'ext': 'mp4',
1627 'title': 'Piku - Trailer',
1628 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1629 'upload_date': '20150811',
1630 'uploader': 'FlixMatrix',
1631 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1632 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1633 'license': 'Standard YouTube License',
1634 },
1635 'params': {
1636 'skip_download': True,
1637 },
eb6793ba 1638 'skip': 'This video is not available.',
022a5d66 1639 },
12afdc2a
S
1640 {
1641 # YouTube Red video with episode data
1642 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1643 'info_dict': {
1644 'id': 'iqKdEhx-dD4',
1645 'ext': 'mp4',
1646 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1647 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1648 'duration': 2085,
12afdc2a
S
1649 'upload_date': '20170118',
1650 'uploader': 'Vsauce',
1651 'uploader_id': 'Vsauce',
1652 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1653 'series': 'Mind Field',
1654 'season_number': 1,
1655 'episode_number': 1,
976ae3ea 1656 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1657 'tags': 'count:12',
1658 'view_count': int,
1659 'availability': 'public',
1660 'age_limit': 0,
1661 'channel': 'Vsauce',
1662 'episode': 'Episode 1',
1663 'categories': ['Entertainment'],
1664 'season': 'Season 1',
1665 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1666 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1667 'like_count': int,
1668 'playable_in_embed': True,
1669 'live_status': 'not_live',
12afdc2a
S
1670 },
1671 'params': {
1672 'skip_download': True,
1673 },
1674 'expected_warnings': [
1675 'Skipping DASH manifest',
1676 ],
1677 },
c7121fa7
S
1678 {
1679 # The following content has been identified by the YouTube community
1680 # as inappropriate or offensive to some audiences.
1681 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1682 'info_dict': {
1683 'id': '6SJNVb0GnPI',
1684 'ext': 'mp4',
1685 'title': 'Race Differences in Intelligence',
1686 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1687 'duration': 965,
1688 'upload_date': '20140124',
1689 'uploader': 'New Century Foundation',
1690 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1691 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1692 },
1693 'params': {
1694 'skip_download': True,
1695 },
545cc85d 1696 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1697 },
022a5d66
S
1698 {
1699 # itag 212
1700 'url': '1t24XAntNCY',
1701 'only_matching': True,
fd5c4aab
S
1702 },
1703 {
1704 # geo restricted to JP
1705 'url': 'sJL6WA-aGkQ',
1706 'only_matching': True,
1707 },
cd5a74a2
S
1708 {
1709 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1710 'only_matching': True,
1711 },
bc2ca1bb 1712 {
1713 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1714 'only_matching': True,
1715 },
1716 {
1717 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1718 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1719 'only_matching': True,
1720 },
825cd268
RA
1721 {
1722 # DRM protected
1723 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1724 'only_matching': True,
4fe54c12
S
1725 },
1726 {
1727 # Video with unsupported adaptive stream type formats
1728 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1729 'info_dict': {
1730 'id': 'Z4Vy8R84T1U',
1731 'ext': 'mp4',
1732 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1733 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1734 'duration': 433,
1735 'upload_date': '20130923',
1736 'uploader': 'Amelia Putri Harwita',
1737 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1738 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1739 'formats': 'maxcount:10',
1740 },
1741 'params': {
1742 'skip_download': True,
1743 'youtube_include_dash_manifest': False,
1744 },
5429d6a9 1745 'skip': 'not actual anymore',
5caabd3c 1746 },
1747 {
822b9d9c 1748 # Youtube Music Auto-generated description
5caabd3c 1749 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1750 'info_dict': {
1751 'id': 'MgNrAu2pzNs',
1752 'ext': 'mp4',
1753 'title': 'Voyeur Girl',
1754 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1755 'upload_date': '20190312',
5429d6a9
S
1756 'uploader': 'Stephen - Topic',
1757 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1758 'artist': 'Stephen',
1759 'track': 'Voyeur Girl',
1760 'album': 'it\'s too much love to know my dear',
1761 'release_date': '20190313',
1762 'release_year': 2019,
976ae3ea 1763 'alt_title': 'Voyeur Girl',
1764 'view_count': int,
1765 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1766 'playable_in_embed': True,
1767 'like_count': int,
1768 'categories': ['Music'],
1769 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1770 'channel': 'Stephen',
1771 'availability': 'public',
1772 'creator': 'Stephen',
1773 'duration': 169,
1774 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1775 'age_limit': 0,
1776 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1777 'tags': 'count:11',
1778 'live_status': 'not_live',
5caabd3c 1779 },
1780 'params': {
1781 'skip_download': True,
1782 },
1783 },
66b48727
RA
1784 {
1785 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1786 'only_matching': True,
1787 },
011e75e6
S
1788 {
1789 # invalid -> valid video id redirection
1790 'url': 'DJztXj2GPfl',
1791 'info_dict': {
1792 'id': 'DJztXj2GPfk',
1793 'ext': 'mp4',
1794 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1795 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1796 'upload_date': '20090125',
1797 'uploader': 'Prochorowka',
1798 'uploader_id': 'Prochorowka',
1799 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1800 'artist': 'Panjabi MC',
1801 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1802 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1803 },
1804 'params': {
1805 'skip_download': True,
1806 },
545cc85d 1807 'skip': 'Video unavailable',
ea74e00b
DP
1808 },
1809 {
1810 # empty description results in an empty string
1811 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1812 'info_dict': {
1813 'id': 'x41yOUIvK2k',
1814 'ext': 'mp4',
1815 'title': 'IMG 3456',
1816 'description': '',
1817 'upload_date': '20170613',
1818 'uploader_id': 'ElevageOrVert',
1819 'uploader': 'ElevageOrVert',
976ae3ea 1820 'view_count': int,
1821 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1822 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1823 'like_count': int,
1824 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1825 'tags': [],
1826 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1827 'availability': 'public',
1828 'age_limit': 0,
1829 'categories': ['Pets & Animals'],
1830 'duration': 7,
1831 'playable_in_embed': True,
1832 'live_status': 'not_live',
1833 'channel': 'ElevageOrVert',
ea74e00b
DP
1834 },
1835 'params': {
1836 'skip_download': True,
1837 },
1838 },
a0566bbf 1839 {
29f7c58a 1840 # with '};' inside yt initial data (see [1])
1841 # see [2] for an example with '};' inside ytInitialPlayerResponse
1842 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1843 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1844 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1845 'info_dict': {
1846 'id': 'CHqg6qOn4no',
1847 'ext': 'mp4',
1848 'title': 'Part 77 Sort a list of simple types in c#',
1849 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1850 'upload_date': '20130831',
1851 'uploader_id': 'kudvenkat',
1852 'uploader': 'kudvenkat',
976ae3ea 1853 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
1854 'like_count': int,
1855 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
1856 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
1857 'live_status': 'not_live',
1858 'categories': ['Education'],
1859 'availability': 'public',
1860 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
1861 'tags': 'count:12',
1862 'playable_in_embed': True,
1863 'age_limit': 0,
1864 'view_count': int,
1865 'duration': 522,
1866 'channel': 'kudvenkat',
a0566bbf 1867 },
1868 'params': {
1869 'skip_download': True,
1870 },
1871 },
29f7c58a 1872 {
1873 # another example of '};' in ytInitialData
1874 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1875 'only_matching': True,
1876 },
1877 {
1878 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1879 'only_matching': True,
1880 },
545cc85d 1881 {
cc2db878 1882 # https://github.com/ytdl-org/youtube-dl/pull/28094
1883 'url': 'OtqTfy26tG0',
1884 'info_dict': {
1885 'id': 'OtqTfy26tG0',
1886 'ext': 'mp4',
1887 'title': 'Burn Out',
1888 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1889 'upload_date': '20141120',
1890 'uploader': 'The Cinematic Orchestra - Topic',
1891 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1892 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1893 'artist': 'The Cinematic Orchestra',
1894 'track': 'Burn Out',
1895 'album': 'Every Day',
976ae3ea 1896 'like_count': int,
1897 'live_status': 'not_live',
1898 'alt_title': 'Burn Out',
1899 'duration': 614,
1900 'age_limit': 0,
1901 'view_count': int,
1902 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1903 'creator': 'The Cinematic Orchestra',
1904 'channel': 'The Cinematic Orchestra',
1905 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
1906 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1907 'availability': 'public',
1908 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
1909 'categories': ['Music'],
1910 'playable_in_embed': True,
cc2db878 1911 },
1912 'params': {
1913 'skip_download': True,
1914 },
545cc85d 1915 },
bc2ca1bb 1916 {
1917 # controversial video, only works with bpctr when authenticated with cookies
1918 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1919 'only_matching': True,
1920 },
a1a7907b 1921 {
1922 # controversial video, requires bpctr/contentCheckOk
1923 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1924 'info_dict': {
1925 'id': 'SZJvDhaSDnc',
1926 'ext': 'mp4',
1927 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1928 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
976ae3ea 1929 'uploader': 'CBS Mornings',
11f9be09 1930 'uploader_id': 'CBSThisMorning',
a1a7907b 1931 'upload_date': '20140716',
976ae3ea 1932 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
1933 'duration': 170,
1934 'categories': ['News & Politics'],
1935 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
1936 'view_count': int,
1937 'channel': 'CBS Mornings',
1938 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
1939 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
1940 'age_limit': 18,
1941 'availability': 'needs_auth',
1942 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
1943 'like_count': int,
1944 'live_status': 'not_live',
1945 'playable_in_embed': True,
a1a7907b 1946 }
1947 },
f7ad7160 1948 {
1949 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1950 'url': 'cBvYw8_A0vQ',
1951 'info_dict': {
1952 'id': 'cBvYw8_A0vQ',
1953 'ext': 'mp4',
1954 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1955 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1956 'upload_date': '20201120',
1957 'uploader': 'Walk around Japan',
1958 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1959 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
976ae3ea 1960 'duration': 1456,
1961 'categories': ['Travel & Events'],
1962 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1963 'view_count': int,
1964 'channel': 'Walk around Japan',
1965 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
1966 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
1967 'age_limit': 0,
1968 'availability': 'public',
1969 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1970 'live_status': 'not_live',
1971 'playable_in_embed': True,
f7ad7160 1972 },
1973 'params': {
1974 'skip_download': True,
1975 },
0fb983f6 1976 }, {
1977 # Has multiple audio streams
1978 'url': 'WaOKSUlf4TM',
1979 'only_matching': True
9297939e 1980 }, {
1981 # Requires Premium: has format 141 when requested using YTM url
1982 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1983 'only_matching': True
1984 }, {
120916da 1985 # multiple subtitles with same lang_code
1986 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1987 'only_matching': True,
109dd3b2 1988 }, {
1989 # Force use android client fallback
1990 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1991 'info_dict': {
1992 'id': 'YOelRv7fMxY',
11f9be09 1993 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1994 'ext': '3gp',
1995 'upload_date': '20210624',
1996 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1997 'uploader': 'colinfurze',
11f9be09 1998 'uploader_id': 'colinfurze',
109dd3b2 1999 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2000 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2001 'duration': 596,
2002 'categories': ['Entertainment'],
2003 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2004 'view_count': int,
2005 'channel': 'colinfurze',
2006 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2007 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2008 'age_limit': 0,
2009 'availability': 'public',
2010 'like_count': int,
2011 'live_status': 'not_live',
2012 'playable_in_embed': True,
109dd3b2 2013 },
2014 'params': {
2015 'format': '17', # 3gp format available on android
2016 'extractor_args': {'youtube': {'player_client': ['android']}},
2017 },
120916da 2018 },
109dd3b2 2019 {
2020 # Skip download of additional client configs (remix client config in this case)
2021 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2022 'only_matching': True,
2023 'params': {
2024 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2025 },
8fc54b12 2026 }, {
2027 # shorts
2028 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2029 'only_matching': True,
9222c381 2030 }, {
2031 'note': 'Storyboards',
2032 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2033 'info_dict': {
2034 'id': '5KLPxDtMqe8',
2035 'ext': 'mhtml',
2036 'format_id': 'sb0',
2037 'title': 'Your Brain is Plastic',
2038 'uploader_id': 'scishow',
2039 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2040 'upload_date': '20140324',
2041 'uploader': 'SciShow',
976ae3ea 2042 'like_count': int,
2043 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2044 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2045 'view_count': int,
2046 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2047 'playable_in_embed': True,
2048 'tags': 'count:12',
2049 'uploader_url': 'http://www.youtube.com/user/scishow',
2050 'availability': 'public',
2051 'channel': 'SciShow',
2052 'live_status': 'not_live',
2053 'duration': 248,
2054 'categories': ['Education'],
2055 'age_limit': 0,
9222c381 2056 }, 'params': {'format': 'mhtml', 'skip_download': True}
2057 }
2eb88d95
PH
2058 ]
2059
201c1459 2060 @classmethod
2061 def suitable(cls, url):
4dfbf869 2062 from ..utils import parse_qs
2063
201c1459 2064 qs = parse_qs(url)
2065 if qs.get('list', [None])[0]:
2066 return False
2067 return super(YoutubeIE, cls).suitable(url)
2068
e0df6211
PH
2069 def __init__(self, *args, **kwargs):
2070 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 2071 self._code_cache = {}
83799698 2072 self._player_cache = {}
e0df6211 2073
adbc4ec4 2074 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
adbc4ec4
THD
2075 lock = threading.Lock()
2076
2077 is_live = True
185bf310 2078 start_time = time.time()
adbc4ec4
THD
2079 formats = [f for f in formats if f.get('is_from_start')]
2080
185bf310 2081 def refetch_manifest(format_id, delay):
2082 nonlocal formats, start_time, is_live
2083 if time.time() <= start_time + delay:
adbc4ec4
THD
2084 return
2085
2086 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2087 video_details = traverse_obj(
2088 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2089 microformats = traverse_obj(
2090 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2091 expected_type=dict, default=[])
2092 _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
185bf310 2093 start_time = time.time()
adbc4ec4 2094
185bf310 2095 def mpd_feed(format_id, delay):
adbc4ec4
THD
2096 """
2097 @returns (manifest_url, manifest_stream_number, is_live) or None
2098 """
2099 with lock:
185bf310 2100 refetch_manifest(format_id, delay)
adbc4ec4
THD
2101
2102 f = next((f for f in formats if f['format_id'] == format_id), None)
2103 if not f:
185bf310 2104 if not is_live:
2105 self.to_screen(f'{video_id}: Video is no longer live')
2106 else:
2107 self.report_warning(
2108 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
adbc4ec4
THD
2109 return None
2110 return f['manifest_url'], f['manifest_stream_number'], is_live
2111
2112 for f in formats:
2113 f['protocol'] = 'http_dash_segments_generator'
2114 f['fragments'] = functools.partial(
2115 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2116
2117 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2118 FETCH_SPAN, MAX_DURATION = 5, 432000
2119
2120 mpd_url, stream_number, is_live = None, None, True
2121
2122 begin_index = 0
2123 download_start_time = ctx.get('start') or time.time()
2124
2125 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2126 if lack_early_segments:
2127 self.report_warning(bug_reports_message(
2128 'Starting download from the last 120 hours of the live stream since '
2129 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2130 lack_early_segments = True
2131
2132 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2133 fragments, fragment_base_url = None, None
2134
2135 def _extract_sequence_from_mpd(refresh_sequence):
2136 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2137 # Obtain from MPD's maximum seq value
2138 old_mpd_url = mpd_url
185bf310 2139 last_error = ctx.pop('last_error', None)
2140 expire_fast = last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403
2141 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2142 or (mpd_url, stream_number, False))
2143 if not refresh_sequence:
2144 if expire_fast and not is_live:
2145 return False, last_seq
2146 elif old_mpd_url == mpd_url:
2147 return True, last_seq
adbc4ec4
THD
2148 try:
2149 fmts, _ = self._extract_mpd_formats_and_subtitles(
2150 mpd_url, None, note=False, errnote=False, fatal=False)
2151 except ExtractorError:
2152 fmts = None
2153 if not fmts:
2154 no_fragment_score += 1
2155 return False, last_seq
2156 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2157 fragments = fmt_info['fragments']
2158 fragment_base_url = fmt_info['fragment_base_url']
2159 assert fragment_base_url
2160
2161 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2162 return True, _last_seq
2163
2164 while is_live:
2165 fetch_time = time.time()
2166 if no_fragment_score > 30:
2167 return
2168 if last_segment_url:
2169 # Obtain from "X-Head-Seqnum" header value from each segment
2170 try:
2171 urlh = self._request_webpage(
2172 last_segment_url, None, note=False, errnote=False, fatal=False)
2173 except ExtractorError:
2174 urlh = None
2175 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2176 if last_seq is None:
2177 no_fragment_score += 1
2178 last_segment_url = None
2179 continue
2180 else:
185bf310 2181 should_continue, last_seq = _extract_sequence_from_mpd(True)
2182 if not should_continue:
adbc4ec4
THD
2183 continue
2184
2185 if known_idx > last_seq:
2186 last_segment_url = None
2187 continue
2188
2189 last_seq += 1
2190
2191 if begin_index < 0 and known_idx < 0:
2192 # skip from the start when it's negative value
2193 known_idx = last_seq + begin_index
2194 if lack_early_segments:
2195 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2196 try:
2197 for idx in range(known_idx, last_seq):
2198 # do not update sequence here or you'll get skipped some part of it
185bf310 2199 should_continue, _ = _extract_sequence_from_mpd(False)
2200 if not should_continue:
adbc4ec4
THD
2201 known_idx = idx - 1
2202 raise ExtractorError('breaking out of outer loop')
2203 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2204 yield {
2205 'url': last_segment_url,
2206 }
2207 if known_idx == last_seq:
2208 no_fragment_score += 5
2209 else:
2210 no_fragment_score = 0
2211 known_idx = last_seq
2212 except ExtractorError:
2213 continue
2214
2215 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2216
b6de707d 2217 def _extract_player_url(self, *ytcfgs, webpage=None):
2218 player_url = traverse_obj(
2219 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2220 get_all=False, expected_type=compat_str)
11f9be09 2221 if not player_url:
b6de707d 2222 return
109dd3b2 2223 if player_url.startswith('//'):
2224 player_url = 'https:' + player_url
2225 elif not re.match(r'https?://', player_url):
2226 player_url = compat_urlparse.urljoin(
2227 'https://www.youtube.com', player_url)
2228 return player_url
2229
b6de707d 2230 def _download_player_url(self, video_id, fatal=False):
2231 res = self._download_webpage(
2232 'https://www.youtube.com/iframe_api',
2233 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2234 if res:
2235 player_version = self._search_regex(
2236 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2237 if player_version:
2238 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2239
60064c53
PH
2240 def _signature_cache_id(self, example_sig):
2241 """ Return a string representation of a signature """
78caa52a 2242 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 2243
e40c758c
S
2244 @classmethod
2245 def _extract_player_info(cls, player_url):
2246 for player_re in cls._PLAYER_INFO_RE:
2247 id_m = re.search(player_re, player_url)
2248 if id_m:
2249 break
2250 else:
c081b35c 2251 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 2252 return id_m.group('id')
e40c758c 2253
404f611f 2254 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2255 player_id = self._extract_player_info(player_url)
2256 if player_id not in self._code_cache:
1276a43a 2257 code = self._download_webpage(
109dd3b2 2258 player_url, video_id, fatal=fatal,
2259 note='Downloading player ' + player_id,
2260 errnote='Download of %s failed' % player_url)
1276a43a 2261 if code:
2262 self._code_cache[player_id] = code
404f611f 2263 return self._code_cache.get(player_id)
109dd3b2 2264
e40c758c 2265 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2266 player_id = self._extract_player_info(player_url)
e0df6211 2267
c4417ddb 2268 # Read from filesystem cache
545cc85d 2269 func_id = 'js_%s_%s' % (
2270 player_id, self._signature_cache_id(example_sig))
c4417ddb 2271 assert os.path.basename(func_id) == func_id
a0e07d31 2272
69ea8ca4 2273 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 2274 if cache_spec is not None:
78caa52a 2275 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 2276
404f611f 2277 code = self._load_player(video_id, player_url)
2278 if code:
109dd3b2 2279 res = self._parse_sig_js(code)
e0df6211 2280
109dd3b2 2281 test_string = ''.join(map(compat_chr, range(len(example_sig))))
2282 cache_res = res(test_string)
2283 cache_spec = [ord(c) for c in cache_res]
83799698 2284
109dd3b2 2285 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
2286 return res
83799698 2287
60064c53 2288 def _print_sig_code(self, func, example_sig):
404f611f 2289 if not self.get_param('youtube_print_sig_code'):
2290 return
2291
edf3e38e
PH
2292 def gen_sig_code(idxs):
2293 def _genslice(start, end, step):
78caa52a 2294 starts = '' if start == 0 else str(start)
8bcc8756 2295 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 2296 steps = '' if step == 1 else (':%d' % step)
78caa52a 2297 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
2298
2299 step = None
7af808a5
PH
2300 # Quelch pyflakes warnings - start will be set when step is set
2301 start = '(Never used)'
edf3e38e
PH
2302 for i, prev in zip(idxs[1:], idxs[:-1]):
2303 if step is not None:
2304 if i - prev == step:
2305 continue
2306 yield _genslice(start, prev, step)
2307 step = None
2308 continue
2309 if i - prev in [-1, 1]:
2310 step = i - prev
2311 start = prev
2312 continue
2313 else:
78caa52a 2314 yield 's[%d]' % prev
edf3e38e 2315 if step is None:
78caa52a 2316 yield 's[%d]' % i
edf3e38e
PH
2317 else:
2318 yield _genslice(start, i, step)
2319
78caa52a 2320 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 2321 cache_res = func(test_string)
edf3e38e 2322 cache_spec = [ord(c) for c in cache_res]
78caa52a 2323 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
2324 signature_id_tuple = '(%s)' % (
2325 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 2326 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 2327 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 2328 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 2329
e0df6211
PH
2330 def _parse_sig_js(self, jscode):
2331 funcname = self._search_regex(
abefc03f
S
2332 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2333 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
2334 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2335 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2336 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2337 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 2338 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
2339 # Obsolete patterns
2340 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 2341 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
2342 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2343 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2344 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2345 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2346 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2347 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2348 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2349
2350 jsi = JSInterpreter(jscode)
2351 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2352 return lambda s: initial_function([s])
2353
545cc85d 2354 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2355 """Turn the encrypted s field into a working signature"""
6b37f0be 2356
c8bf86d5 2357 if player_url is None:
69ea8ca4 2358 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 2359
c8bf86d5 2360 try:
62af3a0e 2361 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
2362 if player_id not in self._player_cache:
2363 func = self._extract_signature_function(
60064c53 2364 video_id, player_url, s
c8bf86d5
PH
2365 )
2366 self._player_cache[player_id] = func
2367 func = self._player_cache[player_id]
404f611f 2368 self._print_sig_code(func, s)
c8bf86d5
PH
2369 return func(s)
2370 except Exception as e:
404f611f 2371 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
2372
2373 def _decrypt_nsig(self, s, video_id, player_url):
2374 """Turn the encrypted n field into a working signature"""
2375 if player_url is None:
2376 raise ExtractorError('Cannot decrypt nsig without player_url')
2377 if player_url.startswith('//'):
2378 player_url = 'https:' + player_url
2379 elif not re.match(r'https?://', player_url):
2380 player_url = compat_urlparse.urljoin(
2381 'https://www.youtube.com', player_url)
2382
2383 sig_id = ('nsig_value', s)
2384 if sig_id in self._player_cache:
2385 return self._player_cache[sig_id]
2386
2387 try:
2388 player_id = ('nsig', player_url)
2389 if player_id not in self._player_cache:
2390 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
2391 func = self._player_cache[player_id]
2392 self._player_cache[sig_id] = func(s)
2393 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
2394 return self._player_cache[sig_id]
2395 except Exception as e:
aa9369a2 2396 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
404f611f 2397
2398 def _extract_n_function_name(self, jscode):
2399 return self._search_regex(
2400 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
2401 jscode, 'Initial JS player n function name', group='nfunc')
2402
2403 def _extract_n_function(self, video_id, player_url):
2404 player_id = self._extract_player_info(player_url)
2405 func_code = self._downloader.cache.load('youtube-nsig', player_id)
2406
2407 if func_code:
2408 jsi = JSInterpreter(func_code)
2409 else:
2410 jscode = self._load_player(video_id, player_url)
2411 funcname = self._extract_n_function_name(jscode)
2412 jsi = JSInterpreter(jscode)
2413 func_code = jsi.extract_function_code(funcname)
2414 self._downloader.cache.store('youtube-nsig', player_id, func_code)
2415
2416 if self.get_param('youtube_print_sig_code'):
2417 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2418
2419 return lambda s: jsi.extract_function_from_code(*func_code)([s])
e0df6211 2420
109dd3b2 2421 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2422 """
2423 Extract signatureTimestamp (sts)
2424 Required to tell API what sig/player version is in use.
2425 """
2426 sts = None
2427 if isinstance(ytcfg, dict):
2428 sts = int_or_none(ytcfg.get('STS'))
2429
2430 if not sts:
2431 # Attempt to extract from player
2432 if player_url is None:
2433 error_msg = 'Cannot extract signature timestamp without player_url.'
2434 if fatal:
2435 raise ExtractorError(error_msg)
2436 self.report_warning(error_msg)
2437 return
404f611f 2438 code = self._load_player(video_id, player_url, fatal=fatal)
2439 if code:
109dd3b2 2440 sts = int_or_none(self._search_regex(
2441 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2442 'JS player signature timestamp', group='sts', fatal=fatal))
2443 return sts
2444
11f9be09 2445 def _mark_watched(self, video_id, player_responses):
9222c381 2446 playback_url = get_first(
2447 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2448 expected_type=url_or_none)
d77ab8e2 2449 if not playback_url:
352d63fd 2450 self.report_warning('Unable to mark watched')
d77ab8e2
S
2451 return
2452 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2453 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2454
2455 # cpn generation algorithm is reverse engineered from base.js.
2456 # In fact it works even with dummy cpn.
2457 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2458 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2459
2460 qs.update({
2461 'ver': ['2'],
2462 'cpn': [cpn],
2463 })
2464 playback_url = compat_urlparse.urlunparse(
15707c7e 2465 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
2466
2467 self._download_webpage(
2468 playback_url, video_id, 'Marking watched',
2469 'Unable to mark watched', fatal=False)
2470
66c9fa36
S
2471 @staticmethod
2472 def _extract_urls(webpage):
2473 # Embedded YouTube player
2474 entries = [
2475 unescapeHTML(mobj.group('url'))
2476 for mobj in re.finditer(r'''(?x)
2477 (?:
2478 <iframe[^>]+?src=|
2479 data-video-url=|
2480 <embed[^>]+?src=|
2481 embedSWF\(?:\s*|
2482 <object[^>]+data=|
2483 new\s+SWFObject\(
2484 )
2485 (["\'])
2486 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 2487 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
2488 \1''', webpage)]
2489
2490 # lazyYT YouTube embed
2491 entries.extend(list(map(
2492 unescapeHTML,
2493 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2494
2495 # Wordpress "YouTube Video Importer" plugin
2496 matches = re.findall(r'''(?x)<div[^>]+
2497 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2498 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2499 entries.extend(m[-1] for m in matches)
2500
2501 return entries
2502
2503 @staticmethod
2504 def _extract_url(webpage):
2505 urls = YoutubeIE._extract_urls(webpage)
2506 return urls[0] if urls else None
2507
97665381
PH
2508 @classmethod
2509 def extract_id(cls, url):
2510 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2511 if mobj is None:
69ea8ca4 2512 raise ExtractorError('Invalid URL: %s' % url)
5ad28e7f 2513 return mobj.group('id')
c5e8d7af 2514
7c365c21 2515 def _extract_chapters_from_json(self, data, duration):
2516 chapter_list = traverse_obj(
2517 data, (
2518 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2519 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2520 ), expected_type=list)
2521
2522 return self._extract_chapters(
2523 chapter_list,
2524 chapter_time=lambda chapter: float_or_none(
2525 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2526 chapter_title=lambda chapter: traverse_obj(
2527 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2528 duration=duration)
2529
2530 def _extract_chapters_from_engagement_panel(self, data, duration):
2531 content_list = traverse_obj(
8bdd16b4 2532 data,
7c365c21 2533 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2534 expected_type=list, default=[])
052e1350 2535 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2536 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2537
2538 return next((
2539 filter(None, (
2540 self._extract_chapters(
2541 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2542 chapter_time, chapter_title, duration)
2543 for contents in content_list
2544 ))), [])
2545
2546 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2547 chapters = []
7c365c21 2548 last_chapter = {'start_time': 0}
2549 for idx, chapter in enumerate(chapter_list or []):
2550 title = chapter_title(chapter)
84213ea8
S
2551 start_time = chapter_time(chapter)
2552 if start_time is None:
2553 continue
7c365c21 2554 last_chapter['end_time'] = start_time
2555 if start_time < last_chapter['start_time']:
2556 if idx == 1:
2557 chapters.pop()
2558 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2559 else:
2560 self.report_warning(f'Invalid start time for chapter "{title}"')
2561 continue
2562 last_chapter = {'start_time': start_time, 'title': title}
2563 chapters.append(last_chapter)
2564 last_chapter['end_time'] = duration
84213ea8
S
2565 return chapters
2566
545cc85d 2567 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2568 return self._parse_json(self._search_regex(
2569 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2570 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2571
a1c5d2ca
M
2572 def _extract_comment(self, comment_renderer, parent=None):
2573 comment_id = comment_renderer.get('commentId')
2574 if not comment_id:
2575 return
fe93e2c4 2576
052e1350 2577 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2578
49bd8c66 2579 # note: timestamp is an estimate calculated from the current time and time_text
f3aa3c3f 2580 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
052e1350 2581 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2582 author_id = try_get(comment_renderer,
2583 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2584
49bd8c66 2585 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2586 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2587 author_thumbnail = try_get(comment_renderer,
2588 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2589
2590 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2591 is_favorited = 'creatorHeart' in (try_get(
2592 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2593 return {
2594 'id': comment_id,
2595 'text': text,
d92f5d5a 2596 'timestamp': timestamp,
a1c5d2ca
M
2597 'time_text': time_text,
2598 'like_count': votes,
97524332 2599 'is_favorited': is_favorited,
a1c5d2ca
M
2600 'author': author,
2601 'author_id': author_id,
2602 'author_thumbnail': author_thumbnail,
2603 'author_is_uploader': author_is_uploader,
2604 'parent': parent or 'root'
2605 }
2606
46383212 2607 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2608
2609 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 2610
2611 def extract_header(contents):
2d6659b9 2612 _continuation = None
2613 for content in contents:
46383212 2614 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 2615 expected_comment_count = self._get_count(
2616 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 2617
2d6659b9 2618 if expected_comment_count:
46383212 2619 tracker['est_total'] = expected_comment_count
2620 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2621 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 2622
2623 sort_menu_item = try_get(
2624 comments_header_renderer,
2625 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2626 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2627
2628 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2629 if not _continuation:
2630 continue
2631
46383212 2632 sort_text = str_or_none(sort_menu_item.get('title'))
2633 if not sort_text:
2d6659b9 2634 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 2635 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 2636 break
a2160aa4 2637 return _continuation
a1c5d2ca 2638
2d6659b9 2639 def extract_thread(contents):
a1c5d2ca 2640 if not parent:
46383212 2641 tracker['current_page_thread'] = 0
a1c5d2ca 2642 for content in contents:
46383212 2643 if not parent and tracker['total_parent_comments'] >= max_parents:
2644 yield
a1c5d2ca 2645 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 2646 comment_renderer = get_first(
2647 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2648 expected_type=dict, default={})
a1c5d2ca 2649
a1c5d2ca
M
2650 comment = self._extract_comment(comment_renderer, parent)
2651 if not comment:
2652 continue
46383212 2653
2654 tracker['running_total'] += 1
2655 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 2656 yield comment
46383212 2657
a1c5d2ca
M
2658 # Attempt to get the replies
2659 comment_replies_renderer = try_get(
2660 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2661
2662 if comment_replies_renderer:
46383212 2663 tracker['current_page_thread'] += 1
a1c5d2ca 2664 comment_entries_iter = self._comment_entries(
99e9e001 2665 comment_replies_renderer, ytcfg, video_id,
46383212 2666 parent=comment.get('id'), tracker=tracker)
2667 for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):
a1c5d2ca
M
2668 yield reply_comment
2669
46383212 2670 # Keeps track of counts across recursive calls
2671 if not tracker:
2672 tracker = dict(
2673 running_total=0,
2674 est_total=0,
2675 current_page_thread=0,
2676 total_parent_comments=0,
2677 total_reply_comments=0)
2678
2679 # TODO: Deprecated
2d6659b9 2680 # YouTube comments have a max depth of 2
46383212 2681 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2682 if max_depth:
2683 self._downloader.deprecation_warning(
2684 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2d6659b9 2685 if max_depth == 1 and parent:
2686 return
a1c5d2ca 2687
46383212 2688 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2689 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 2690
46383212 2691 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 2692 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2693 if message and not parent:
2694 self.report_warning(message, video_id=video_id)
2695
46383212 2696 response = None
2d6659b9 2697 is_first_continuation = parent is None
a1c5d2ca
M
2698
2699 for page_num in itertools.count(0):
2700 if not continuation:
2701 break
46383212 2702 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2703 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 2704 if page_num == 0:
2705 if is_first_continuation:
2706 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2707 else:
2d6659b9 2708 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 2709 tracker['current_page_thread'], comment_prog_str)
2d6659b9 2710 else:
2711 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2712 ' ' if parent else '', ' replies' if parent else '',
2713 page_num, comment_prog_str)
2714
2715 response = self._extract_response(
fe93e2c4 2716 item_id=None, query=continuation,
2d6659b9 2717 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
46383212 2718 check_get_keys='onResponseReceivedEndpoints')
a1c5d2ca 2719
46383212 2720 continuation_contents = traverse_obj(
2721 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 2722
2d6659b9 2723 continuation = None
46383212 2724 for continuation_section in continuation_contents:
2725 continuation_items = traverse_obj(
2726 continuation_section,
2727 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2728 get_all=False, expected_type=list) or []
2729 if is_first_continuation:
2730 continuation = extract_header(continuation_items)
2731 is_first_continuation = False
2d6659b9 2732 if continuation:
a1c5d2ca 2733 break
46383212 2734 continue
a1c5d2ca 2735
46383212 2736 for entry in extract_thread(continuation_items):
2737 if not entry:
2738 return
2739 yield entry
2740 continuation = self._extract_continuation({'contents': continuation_items})
2741 if continuation:
2d6659b9 2742 break
a1c5d2ca 2743
a2160aa4 2744 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2745 """Entry for comment extraction"""
2d6659b9 2746 def _real_comment_extract(contents):
aae16f6e 2747 renderer = next((
2748 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2749 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2750 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 2751
a2160aa4 2752 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 2753 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 2754
109dd3b2 2755 @staticmethod
99e9e001 2756 def _get_checkok_params():
2757 return {'contentCheckOk': True, 'racyCheckOk': True}
2758
2759 @classmethod
2760 def _generate_player_context(cls, sts=None):
109dd3b2 2761 context = {
2762 'html5Preference': 'HTML5_PREF_WANTS',
2763 }
2764 if sts is not None:
2765 context['signatureTimestamp'] = sts
2766 return {
2767 'playbackContext': {
2768 'contentPlaybackContext': context
a1a7907b 2769 },
99e9e001 2770 **cls._get_checkok_params()
109dd3b2 2771 }
2772
e7e94f2a
D
2773 @staticmethod
2774 def _is_agegated(player_response):
2775 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 2776 return True
e7e94f2a
D
2777
2778 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2779 AGE_GATE_REASONS = (
2780 'confirm your age', 'age-restricted', 'inappropriate', # reason
2781 'age_verification_required', 'age_check_required', # status
2782 )
2783 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2784
2785 @staticmethod
2786 def _is_unplayable(player_response):
2787 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 2788
99e9e001 2789 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
109dd3b2 2790
11f9be09 2791 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2792 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 2793 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 2794 headers = self.generate_api_headers(
99e9e001 2795 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 2796
11f9be09 2797 yt_query = {'videoId': video_id}
2798 yt_query.update(self._generate_player_context(sts))
2799 return self._extract_response(
2800 item_id=video_id, ep='player', query=yt_query,
379e44ed 2801 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 2802 default_client=client,
11f9be09 2803 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2804 ) or None
2805
11f9be09 2806 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2807 requested_clients = []
d0d012d4 2808 default = ['android', 'web']
000c15a4 2809 allowed_clients = sorted(
2810 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2811 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 2812 for client in self._configuration_arg('player_client'):
2813 if client in allowed_clients:
2814 requested_clients.append(client)
d0d012d4 2815 elif client == 'default':
2816 requested_clients.extend(default)
b4c055ba 2817 elif client == 'all':
2818 requested_clients.extend(allowed_clients)
2819 else:
2820 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2821 if not requested_clients:
d0d012d4 2822 requested_clients = default
cf7e015f 2823
11f9be09 2824 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2825 requested_clients.extend(
e7e94f2a 2826 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 2827
11f9be09 2828 return orderedSet(requested_clients)
cf7e015f 2829
c0bc527b
M
2830 def _extract_player_ytcfg(self, client, video_id):
2831 url = {
2832 'web_music': 'https://music.youtube.com',
2833 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2834 }.get(client)
2835 if not url:
2836 return {}
18c7683d 2837 webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip())
c0bc527b
M
2838 return self.extract_ytcfg(video_id, webpage) or {}
2839
99e9e001 2840 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
11f9be09 2841 initial_pr = None
2842 if webpage:
2843 initial_pr = self._extract_yt_initial_variable(
2844 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2845 video_id, 'initial player response')
6b09401b 2846
c0bc527b
M
2847 original_clients = clients
2848 clients = clients[::-1]
b6de707d 2849 prs = []
e7e94f2a
D
2850
2851 def append_client(client_name):
2852 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2853 clients.append(client_name)
2854
379e44ed 2855 # Android player_response does not have microFormats which are needed for
2856 # extraction of some data. So we return the initial_pr with formats
2857 # stripped out even if not requested by the user
2858 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 2859 if initial_pr:
2860 pr = dict(initial_pr)
2861 pr['streamingData'] = None
b6de707d 2862 prs.append(pr)
379e44ed 2863
2864 last_error = None
b6de707d 2865 tried_iframe_fallback = False
2866 player_url = None
c0bc527b
M
2867 while clients:
2868 client = clients.pop()
11f9be09 2869 player_ytcfg = master_ytcfg if client == 'web' else {}
c0bc527b
M
2870 if 'configs' not in self._configuration_arg('player_skip'):
2871 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
c0bc527b 2872
b6de707d 2873 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2874 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2875 if 'js' in self._configuration_arg('player_skip'):
2876 require_js_player = False
2877 player_url = None
2878
2879 if not player_url and not tried_iframe_fallback and require_js_player:
2880 player_url = self._download_player_url(video_id)
2881 tried_iframe_fallback = True
2882
379e44ed 2883 try:
2884 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
99e9e001 2885 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
379e44ed 2886 except ExtractorError as e:
2887 if last_error:
2888 self.report_warning(last_error)
2889 last_error = e
2890 continue
2891
11f9be09 2892 if pr:
b6de707d 2893 prs.append(pr)
c0bc527b 2894
e7e94f2a 2895 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
99e9e001 2896 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
e7e94f2a
D
2897 append_client(client.replace('_agegate', '_creator'))
2898 elif self._is_agegated(pr):
2899 append_client(f'{client}_agegate')
c0bc527b 2900
379e44ed 2901 if last_error:
b6de707d 2902 if not len(prs):
379e44ed 2903 raise last_error
2904 self.report_warning(last_error)
b6de707d 2905 return prs, player_url
11f9be09 2906
2907 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
a0bb6ce5 2908 itags, stream_ids = {}, []
2a9c6dcd 2909 itag_qualities, res_qualities = {}, {}
d3fc8074 2910 q = qualities([
2a9c6dcd 2911 # Normally tiny is the smallest video-only formats. But
2912 # audio-only formats with unknown quality may get tagged as tiny
2913 'tiny',
2914 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2915 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2916 ])
11f9be09 2917 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2918
545cc85d 2919 for fmt in streaming_formats:
2920 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2921 continue
321bf820 2922
cc2db878 2923 itag = str_or_none(fmt.get('itag'))
9297939e 2924 audio_track = fmt.get('audioTrack') or {}
2925 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2926 if stream_id in stream_ids:
2927 continue
2928
cc2db878 2929 quality = fmt.get('quality')
2a9c6dcd 2930 height = int_or_none(fmt.get('height'))
d3fc8074 2931 if quality == 'tiny' or not quality:
2932 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2933 # The 3gp format (17) in android client has a quality of "small",
2934 # but is actually worse than other formats
2935 if itag == '17':
2936 quality = 'tiny'
2937 if quality:
2938 if itag:
2939 itag_qualities[itag] = quality
2940 if height:
2941 res_qualities[height] = quality
cc2db878 2942 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2943 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2944 # number of fragment that would subsequently requested with (`&sq=N`)
2945 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2946 continue
2947
545cc85d 2948 fmt_url = fmt.get('url')
2949 if not fmt_url:
2950 sc = compat_parse_qs(fmt.get('signatureCipher'))
2951 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2952 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2953 if not (sc and fmt_url and encrypted_sig):
2954 continue
545cc85d 2955 if not player_url:
201e9eaa 2956 continue
545cc85d 2957 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2958 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2959 fmt_url += '&' + sp + '=' + signature
2960
404f611f 2961 query = parse_qs(fmt_url)
2962 throttled = False
b2916526 2963 if query.get('n'):
404f611f 2964 try:
2965 fmt_url = update_url_query(fmt_url, {
2966 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2967 except ExtractorError as e:
aa9369a2 2968 self.report_warning(
2969 f'nsig extraction failed: You may experience throttling for some formats\n'
2970 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
404f611f 2971 throttled = True
2972
545cc85d 2973 if itag:
a0bb6ce5 2974 itags[itag] = 'https'
9297939e 2975 stream_ids.append(stream_id)
2976
cc2db878 2977 tbr = float_or_none(
2978 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2979 dct = {
2980 'asr': int_or_none(fmt.get('audioSampleRate')),
2981 'filesize': int_or_none(fmt.get('contentLength')),
2982 'format_id': itag,
34921b43 2983 'format_note': join_nonempty(
26e8e044 2984 '%s%s' % (audio_track.get('displayName') or '',
2985 ' (default)' if audio_track.get('audioIsDefault') else ''),
404f611f 2986 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
34921b43 2987 throttled and 'THROTTLED', delim=', '),
c18d4482 2988 'source_preference': -10 if throttled else -1,
a4211baf 2989 'fps': int_or_none(fmt.get('fps')) or None,
2a9c6dcd 2990 'height': height,
dca3ff4a 2991 'quality': q(quality),
cc2db878 2992 'tbr': tbr,
545cc85d 2993 'url': fmt_url,
2a9c6dcd 2994 'width': int_or_none(fmt.get('width')),
0fb983f6 2995 'language': audio_track.get('id', '').split('.')[0],
26e8e044 2996 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
545cc85d 2997 }
60bdb7bd 2998 mime_mobj = re.match(
2999 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3000 if mime_mobj:
3001 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3002 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 3003 no_audio = dct.get('acodec') == 'none'
3004 no_video = dct.get('vcodec') == 'none'
3005 if no_audio:
3006 dct['vbr'] = tbr
3007 if no_video:
3008 dct['abr'] = tbr
3009 if no_audio or no_video:
545cc85d 3010 dct['downloader_options'] = {
3011 # Youtube throttles chunks >~10M
3012 'http_chunk_size': 10485760,
bf1317d2 3013 }
7c60c33e 3014 if dct.get('ext'):
3015 dct['container'] = dct['ext'] + '_dash'
11f9be09 3016 yield dct
545cc85d 3017
adbc4ec4 3018 live_from_start = is_live and self.get_param('live_from_start')
4bb6b02f 3019 skip_manifests = self._configuration_arg('skip')
adbc4ec4
THD
3020 if not self.get_param('youtube_include_hls_manifest', True):
3021 skip_manifests.append('hls')
3022 get_dash = 'dash' not in skip_manifests and (
3023 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3024 get_hls = not live_from_start and 'hls' not in skip_manifests
5d3a0e79 3025
a0bb6ce5 3026 def process_manifest_format(f, proto, itag):
3027 if itag in itags:
3028 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3029 return False
3030 itag = f'{itag}-{proto}'
3031 if itag:
3032 f['format_id'] = itag
3033 itags[itag] = proto
3034
3035 f['quality'] = next((
3036 q(qdict[val])
e339d25a 3037 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
a0bb6ce5 3038 if val in qdict), -1)
3039 return True
2a9c6dcd 3040
11f9be09 3041 for sd in streaming_data:
5d3a0e79 3042 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 3043 if hls_manifest_url:
2a9c6dcd 3044 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
a0bb6ce5 3045 if process_manifest_format(f, 'hls', self._search_regex(
3046 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3047 yield f
545cc85d 3048
5d3a0e79 3049 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3050 if dash_manifest_url:
2a9c6dcd 3051 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
a0bb6ce5 3052 if process_manifest_format(f, 'dash', f['format_id']):
3053 f['filesize'] = int_or_none(self._search_regex(
3054 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
adbc4ec4
THD
3055 if live_from_start:
3056 f['is_from_start'] = True
3057
a0bb6ce5 3058 yield f
11f9be09 3059
720c3099 3060 def _extract_storyboard(self, player_responses, duration):
3061 spec = get_first(
3062 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 3063 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3064 if not base_url:
720c3099 3065 return
720c3099 3066 L = len(spec) - 1
3067 for i, args in enumerate(spec):
3068 args = args.split('#')
3069 counts = list(map(int_or_none, args[:5]))
3070 if len(args) != 8 or not all(counts):
3071 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3072 continue
3073 width, height, frame_count, cols, rows = counts
3074 N, sigh = args[6:]
3075
3076 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3077 fragment_count = frame_count / (cols * rows)
3078 fragment_duration = duration / fragment_count
3079 yield {
3080 'format_id': f'sb{i}',
3081 'format_note': 'storyboard',
3082 'ext': 'mhtml',
3083 'protocol': 'mhtml',
3084 'acodec': 'none',
3085 'vcodec': 'none',
3086 'url': url,
3087 'width': width,
3088 'height': height,
3089 'fragments': [{
3090 'path': url.replace('$M', str(j)),
3091 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3092 } for j in range(math.ceil(fragment_count))],
3093 }
3094
adbc4ec4 3095 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 3096 webpage = None
3097 if 'webpage' not in self._configuration_arg('player_skip'):
3098 webpage = self._download_webpage(
3099 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
11f9be09 3100
3101 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 3102
b6de707d 3103 player_responses, player_url = self._extract_player_responses(
11f9be09 3104 self._get_requested_clients(url, smuggled_data),
99e9e001 3105 video_id, webpage, master_ytcfg)
11f9be09 3106
adbc4ec4
THD
3107 return webpage, master_ytcfg, player_responses, player_url
3108
3109 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):
3110 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3111 is_live = get_first(video_details, 'isLive')
3112 if is_live is None:
3113 is_live = get_first(live_broadcast_details, 'isLiveNow')
3114
3115 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
3116 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
3117
3118 return live_broadcast_details, is_live, streaming_data, formats
3119
3120 def _real_extract(self, url):
3121 url, smuggled_data = unsmuggle_url(url, {})
3122 video_id = self._match_id(url)
3123
3124 base_url = self.http_scheme() + '//www.youtube.com/'
3125 webpage_url = base_url + 'watch?v=' + video_id
3126
3127 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3128
11f9be09 3129 playability_statuses = traverse_obj(
3130 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3131
3132 trailer_video_id = get_first(
3133 playability_statuses,
3134 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3135 expected_type=str)
3136 if trailer_video_id:
3137 return self.url_result(
3138 trailer_video_id, self.ie_key(), trailer_video_id)
3139
3140 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3141 if webpage else (lambda x: None))
3142
3143 video_details = traverse_obj(
3144 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3145 microformats = traverse_obj(
3146 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3147 expected_type=dict, default=[])
3148 video_title = (
3149 get_first(video_details, 'title')
3150 or self._get_text(microformats, (..., 'title'))
3151 or search_meta(['og:title', 'twitter:title', 'title']))
3152 video_description = get_first(video_details, 'shortDescription')
3153
d89257f3 3154 multifeed_metadata_list = get_first(
3155 player_responses,
3156 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3157 expected_type=str)
3158 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3159 if self.get_param('noplaylist'):
11f9be09 3160 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 3161 else:
3162 entries = []
3163 feed_ids = []
3164 for feed in multifeed_metadata_list.split(','):
3165 # Unquote should take place before split on comma (,) since textual
3166 # fields may contain comma as well (see
3167 # https://github.com/ytdl-org/youtube-dl/issues/8536)
3168 feed_data = compat_parse_qs(
3169 compat_urllib_parse_unquote_plus(feed))
3170
3171 def feed_entry(name):
3172 return try_get(
3173 feed_data, lambda x: x[name][0], compat_str)
3174
3175 feed_id = feed_entry('id')
3176 if not feed_id:
3177 continue
3178 feed_title = feed_entry('title')
3179 title = video_title
3180 if feed_title:
3181 title += ' (%s)' % feed_title
3182 entries.append({
3183 '_type': 'url_transparent',
3184 'ie_key': 'Youtube',
3185 'url': smuggle_url(
3186 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3187 {'force_singlefeed': True}),
3188 'title': title,
3189 })
3190 feed_ids.append(feed_id)
3191 self.to_screen(
3192 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3193 % (', '.join(feed_ids), video_id))
3194 return self.playlist_result(
3195 entries, video_id, video_title, video_description)
11f9be09 3196
adbc4ec4 3197 live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)
bf1317d2 3198
545cc85d 3199 if not formats:
11f9be09 3200 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 3201 self.report_drm(video_id)
11f9be09 3202 pemr = get_first(
3203 playability_statuses,
3204 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3205 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3206 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 3207 if subreason:
545cc85d 3208 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 3209 countries = get_first(microformats, 'availableCountries')
545cc85d 3210 if not countries:
3211 regions_allowed = search_meta('regionsAllowed')
3212 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 3213 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 3214 reason += f'. {subreason}'
545cc85d 3215 if reason:
b7da73eb 3216 self.raise_no_formats(reason, expected=True)
bf1317d2 3217
11f9be09 3218 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 3219 if not keywords and webpage:
3220 keywords = [
3221 unescapeHTML(m.group('content'))
3222 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3223 for keyword in keywords:
3224 if keyword.startswith('yt:stretch='):
201c1459 3225 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3226 if mobj:
3227 # NB: float is intentional for forcing float division
3228 w, h = (float(v) for v in mobj.groups())
3229 if w > 0 and h > 0:
3230 ratio = w / h
3231 for f in formats:
3232 if f.get('vcodec') != 'none':
3233 f['stretched_ratio'] = ratio
3234 break
a709d873 3235 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 3236 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3237 if thumbnail_url:
3238 thumbnails.append({
3239 'url': thumbnail_url,
ff2751ac 3240 })
fccf5021 3241 original_thumbnails = thumbnails.copy()
3242
0ba692ac 3243 # The best resolution thumbnails sometimes does not appear in the webpage
3244 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 3245 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 3246 thumbnail_names = [
3247 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
cca80fe6 3248 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
3249 'mqdefault', 'mq1', 'mq2', 'mq3',
3250 'default', '1', '2', '3'
3251 ]
cca80fe6 3252 n_thumbnail_names = len(thumbnail_names)
0ba692ac 3253 thumbnails.extend({
3254 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3255 video_id=video_id, name=name, ext=ext,
3256 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 3257 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 3258 for thumb in thumbnails:
cca80fe6 3259 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 3260 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 3261 self._remove_duplicate_formats(thumbnails)
fccf5021 3262 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 3263
7ea65411 3264 category = get_first(microformats, 'category') or search_meta('genre')
3265 channel_id = str_or_none(
3266 get_first(video_details, 'channelId')
3267 or get_first(microformats, 'externalChannelId')
3268 or search_meta('channelId'))
3269 duration = int_or_none(
3270 get_first(video_details, 'lengthSeconds')
3271 or get_first(microformats, 'lengthSeconds')
3272 or parse_duration(search_meta('duration'))) or None
3273 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3274
3275 live_content = get_first(video_details, 'isLiveContent')
3276 is_upcoming = get_first(video_details, 'isUpcoming')
3277 if is_live is None:
3278 if is_upcoming or live_content is False:
3279 is_live = False
3280 if is_upcoming is None and (live_content or is_live):
3281 is_upcoming = False
adbc4ec4
THD
3282 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3283 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3284 if not duration and live_end_time and live_start_time:
3285 duration = live_end_time - live_start_time
3286
3287 if is_live and self.get_param('live_from_start'):
3288 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
7ea65411 3289
720c3099 3290 formats.extend(self._extract_storyboard(player_responses, duration))
3291
3292 # Source is given priority since formats that throttle are given lower source_preference
3293 # When throttling issue is fully fixed, remove this
3294 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
3295
545cc85d 3296 info = {
3297 'id': video_id,
39ca3b5c 3298 'title': video_title,
545cc85d 3299 'formats': formats,
3300 'thumbnails': thumbnails,
fccf5021 3301 # The best thumbnail that we are sure exists. Prevents unnecessary
3302 # URL checking if user don't care about getting the best possible thumbnail
3303 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 3304 'description': video_description,
3305 'upload_date': unified_strdate(
11f9be09 3306 get_first(microformats, 'uploadDate')
545cc85d 3307 or search_meta('uploadDate')),
11f9be09 3308 'uploader': get_first(video_details, 'author'),
545cc85d 3309 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3310 'uploader_url': owner_profile_url,
3311 'channel_id': channel_id,
11f9be09 3312 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 3313 'duration': duration,
3314 'view_count': int_or_none(
11f9be09 3315 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 3316 or search_meta('interactionCount')),
11f9be09 3317 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 3318 'age_limit': 18 if (
11f9be09 3319 get_first(microformats, 'isFamilySafe') is False
545cc85d 3320 or search_meta('isFamilyFriendly') == 'false'
3321 or search_meta('og:restrictions:age') == '18+') else 0,
3322 'webpage_url': webpage_url,
3323 'categories': [category] if category else None,
3324 'tags': keywords,
11f9be09 3325 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 3326 'is_live': is_live,
3327 'was_live': (False if is_live or is_upcoming or live_content is False
3328 else None if is_live is None or is_upcoming is None
3329 else live_content),
3330 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
adbc4ec4 3331 'release_timestamp': live_start_time,
545cc85d 3332 }
b477fc13 3333
3944e7af 3334 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 3335 if pctr:
ecdc9049 3336 def get_lang_code(track):
3337 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3338 or track.get('languageCode'))
3339
3340 # Converted into dicts to remove duplicates
3341 captions = {
3342 get_lang_code(sub): sub
3343 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3344 translation_languages = {
3345 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3346 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3347
774d79cc 3348 def process_language(container, base_url, lang_code, sub_name, query):
120916da 3349 lang_subs = container.setdefault(lang_code, [])
545cc85d 3350 for fmt in self._SUBTITLE_FORMATS:
3351 query.update({
3352 'fmt': fmt,
3353 })
3354 lang_subs.append({
3355 'ext': fmt,
3356 'url': update_url_query(base_url, query),
774d79cc 3357 'name': sub_name,
545cc85d 3358 })
7e72694b 3359
ecdc9049 3360 subtitles, automatic_captions = {}, {}
3361 for lang_code, caption_track in captions.items():
3362 base_url = caption_track.get('baseUrl')
545cc85d 3363 if not base_url:
3364 continue
ecdc9049 3365 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 3366 if caption_track.get('kind') != 'asr':
545cc85d 3367 if not lang_code:
3368 continue
3369 process_language(
ecdc9049 3370 subtitles, base_url, lang_code, lang_name, {})
3371 if not caption_track.get('isTranslatable'):
3372 continue
3944e7af 3373 for trans_code, trans_name in translation_languages.items():
3374 if not trans_code:
545cc85d 3375 continue
ecdc9049 3376 if caption_track.get('kind') != 'asr':
3377 trans_code += f'-{lang_code}'
3378 trans_name += format_field(lang_name, template=' from %s')
545cc85d 3379 process_language(
ecdc9049 3380 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
3381 info['automatic_captions'] = automatic_captions
3382 info['subtitles'] = subtitles
7e72694b 3383
545cc85d 3384 parsed_url = compat_urllib_parse_urlparse(url)
3385 for component in [parsed_url.fragment, parsed_url.query]:
3386 query = compat_parse_qs(component)
3387 for k, v in query.items():
3388 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3389 d_k += '_time'
3390 if d_k not in info and k in s_ks:
3391 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
3392
3393 # Youtube Music Auto-generated description
822b9d9c 3394 if video_description:
38d70284 3395 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 3396 if mobj:
822b9d9c
RA
3397 release_year = mobj.group('release_year')
3398 release_date = mobj.group('release_date')
3399 if release_date:
3400 release_date = release_date.replace('-', '')
3401 if not release_year:
545cc85d 3402 release_year = release_date[:4]
3403 info.update({
3404 'album': mobj.group('album'.strip()),
3405 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3406 'track': mobj.group('track').strip(),
3407 'release_date': release_date,
cc2db878 3408 'release_year': int_or_none(release_year),
545cc85d 3409 })
7e72694b 3410
545cc85d 3411 initial_data = None
3412 if webpage:
3413 initial_data = self._extract_yt_initial_variable(
3414 webpage, self._YT_INITIAL_DATA_RE, video_id,
3415 'yt initial data')
3416 if not initial_data:
99e9e001 3417 query = {'videoId': video_id}
3418 query.update(self._get_checkok_params())
109dd3b2 3419 initial_data = self._extract_response(
3420 item_id=video_id, ep='next', fatal=False,
99e9e001 3421 ytcfg=master_ytcfg, query=query,
3422 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 3423 note='Downloading initial data API JSON')
545cc85d 3424
c60ee3a2 3425 try:
3426 # This will error if there is no livechat
3427 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
ecdc9049 3428 info.setdefault('subtitles', {})['live_chat'] = [{
c60ee3a2 3429 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
3430 'video_id': video_id,
3431 'ext': 'json',
f6745c49 3432 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3433 }]
3434 except (KeyError, IndexError, TypeError):
3435 pass
545cc85d 3436
3437 if initial_data:
7c365c21 3438 info['chapters'] = (
3439 self._extract_chapters_from_json(initial_data, duration)
3440 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3441 or None)
545cc85d 3442
3443 contents = try_get(
3444 initial_data,
3445 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3446 list) or []
3447 for content in contents:
3448 vpir = content.get('videoPrimaryInfoRenderer')
3449 if vpir:
3450 stl = vpir.get('superTitleLink')
3451 if stl:
fe93e2c4 3452 stl = self._get_text(stl)
545cc85d 3453 if try_get(
3454 vpir,
3455 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3456 info['location'] = stl
3457 else:
3458 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3459 if mobj:
3460 info.update({
3461 'series': mobj.group(1),
3462 'season_number': int(mobj.group(2)),
3463 'episode_number': int(mobj.group(3)),
3464 })
3465 for tlb in (try_get(
3466 vpir,
3467 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3468 list) or []):
3469 tbr = tlb.get('toggleButtonRenderer') or {}
3470 for getter, regex in [(
3471 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3472 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3473 lambda x: x['accessibility'],
3474 lambda x: x['accessibilityData']['accessibilityData'],
3475 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3476 label = (try_get(tbr, getter, dict) or {}).get('label')
3477 if label:
3478 mobj = re.match(regex, label)
3479 if mobj:
3480 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3481 break
3482 sbr_tooltip = try_get(
3483 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3484 if sbr_tooltip:
3485 like_count, dislike_count = sbr_tooltip.split(' / ')
3486 info.update({
3487 'like_count': str_to_int(like_count),
3488 'dislike_count': str_to_int(dislike_count),
3489 })
3490 vsir = content.get('videoSecondaryInfoRenderer')
3491 if vsir:
052e1350 3492 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 3493 rows = try_get(
3494 vsir,
3495 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3496 list) or []
3497 multiple_songs = False
3498 for row in rows:
3499 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3500 multiple_songs = True
3501 break
3502 for row in rows:
3503 mrr = row.get('metadataRowRenderer') or {}
3504 mrr_title = mrr.get('title')
3505 if not mrr_title:
3506 continue
052e1350 3507 mrr_title = self._get_text(mrr, 'title')
3508 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3509 if mrr_title == 'License':
3510 info['license'] = mrr_contents_text
3511 elif not multiple_songs:
3512 if mrr_title == 'Album':
3513 info['album'] = mrr_contents_text
3514 elif mrr_title == 'Artist':
3515 info['artist'] = mrr_contents_text
3516 elif mrr_title == 'Song':
3517 info['track'] = mrr_contents_text
3518
3519 fallbacks = {
3520 'channel': 'uploader',
3521 'channel_id': 'uploader_id',
3522 'channel_url': 'uploader_url',
3523 }
3524 for to, frm in fallbacks.items():
3525 if not info.get(to):
3526 info[to] = info.get(frm)
3527
3528 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3529 v = info.get(s_k)
3530 if v:
3531 info[d_k] = v
b84071c0 3532
11f9be09 3533 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3534 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3535 is_membersonly = None
b28f8d24 3536 is_premium = None
c224251a
M
3537 if initial_data and is_private is not None:
3538 is_membersonly = False
b28f8d24 3539 is_premium = False
47193e02 3540 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3541 badge_labels = set()
3542 for content in contents:
3543 if not isinstance(content, dict):
3544 continue
3545 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3546 for badge_label in badge_labels:
3547 if badge_label.lower() == 'members only':
3548 is_membersonly = True
3549 elif badge_label.lower() == 'premium':
3550 is_premium = True
3551 elif badge_label.lower() == 'unlisted':
3552 is_unlisted = True
c224251a 3553
c224251a
M
3554 info['availability'] = self._availability(
3555 is_private=is_private,
b28f8d24 3556 needs_premium=is_premium,
c224251a
M
3557 needs_subscription=is_membersonly,
3558 needs_auth=info['age_limit'] >= 18,
3559 is_unlisted=None if is_private is None else is_unlisted)
3560
a2160aa4 3561 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3562
11f9be09 3563 self.mark_watched(video_id, player_responses)
d77ab8e2 3564
545cc85d 3565 return info
c5e8d7af 3566
a61fd4cf 3567
a6213a49 3568class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
5f6a1245 3569
a6213a49 3570 def _extract_channel_id(self, webpage):
3571 channel_id = self._html_search_meta(
3572 'channelId', webpage, 'channel id', default=None)
3573 if channel_id:
3574 return channel_id
3575 channel_url = self._html_search_meta(
3576 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3577 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3578 'twitter:app:url:googleplay'), webpage, 'channel url')
3579 return self._search_regex(
3580 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3581 channel_url, 'channel id')
15f6397c 3582
8bdd16b4 3583 @staticmethod
cd7c66cf 3584 def _extract_basic_item_renderer(item):
3585 # Modified from _extract_grid_item_renderer
201c1459 3586 known_basic_renderers = (
3587 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3588 )
3589 for key, renderer in item.items():
201c1459 3590 if not isinstance(renderer, dict):
cd7c66cf 3591 continue
201c1459 3592 elif key in known_basic_renderers:
3593 return renderer
3594 elif key.startswith('grid') and key.endswith('Renderer'):
3595 return renderer
8bdd16b4 3596
8bdd16b4 3597 def _grid_entries(self, grid_renderer):
3598 for item in grid_renderer['items']:
3599 if not isinstance(item, dict):
39b62db1 3600 continue
cd7c66cf 3601 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3602 if not isinstance(renderer, dict):
3603 continue
052e1350 3604 title = self._get_text(renderer, 'title')
fe93e2c4 3605
8bdd16b4 3606 # playlist
3607 playlist_id = renderer.get('playlistId')
3608 if playlist_id:
3609 yield self.url_result(
3610 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3611 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3612 video_title=title)
201c1459 3613 continue
8bdd16b4 3614 # video
3615 video_id = renderer.get('videoId')
3616 if video_id:
3617 yield self._extract_video(renderer)
201c1459 3618 continue
8bdd16b4 3619 # channel
3620 channel_id = renderer.get('channelId')
3621 if channel_id:
8bdd16b4 3622 yield self.url_result(
3623 'https://www.youtube.com/channel/%s' % channel_id,
3624 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3625 continue
3626 # generic endpoint URL support
3627 ep_url = urljoin('https://www.youtube.com/', try_get(
3628 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3629 compat_str))
3630 if ep_url:
3631 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3632 if ie.suitable(ep_url):
3633 yield self.url_result(
3634 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3635 break
8bdd16b4 3636
3d3dddc9 3637 def _shelf_entries_from_content(self, shelf_renderer):
3638 content = shelf_renderer.get('content')
3639 if not isinstance(content, dict):
8bdd16b4 3640 return
cd7c66cf 3641 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3642 if renderer:
3643 # TODO: add support for nested playlists so each shelf is processed
3644 # as separate playlist
3645 # TODO: this includes only first N items
3646 for entry in self._grid_entries(renderer):
3647 yield entry
3648 renderer = content.get('horizontalListRenderer')
3649 if renderer:
3650 # TODO
3651 pass
8bdd16b4 3652
29f7c58a 3653 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3654 ep = try_get(
3655 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3656 compat_str)
3657 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3658 if shelf_url:
29f7c58a 3659 # Skipping links to another channels, note that checking for
3660 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3661 # will not work
3662 if skip_channels and '/channels?' in shelf_url:
3663 return
052e1350 3664 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3665 yield self.url_result(shelf_url, video_title=title)
3666 # Shelf may not contain shelf URL, fallback to extraction from content
3667 for entry in self._shelf_entries_from_content(shelf_renderer):
3668 yield entry
c5e8d7af 3669
8bdd16b4 3670 def _playlist_entries(self, video_list_renderer):
3671 for content in video_list_renderer['contents']:
3672 if not isinstance(content, dict):
3673 continue
3674 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3675 if not isinstance(renderer, dict):
3676 continue
3677 video_id = renderer.get('videoId')
3678 if not video_id:
3679 continue
3680 yield self._extract_video(renderer)
07aeced6 3681
3462ffa8 3682 def _rich_entries(self, rich_grid_renderer):
3683 renderer = try_get(
70d5c17b 3684 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3685 video_id = renderer.get('videoId')
3686 if not video_id:
3687 return
3688 yield self._extract_video(renderer)
3689
8bdd16b4 3690 def _video_entry(self, video_renderer):
3691 video_id = video_renderer.get('videoId')
3692 if video_id:
3693 return self._extract_video(video_renderer)
dacb3a86 3694
8bdd16b4 3695 def _post_thread_entries(self, post_thread_renderer):
3696 post_renderer = try_get(
3697 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3698 if not post_renderer:
3699 return
3700 # video attachment
3701 video_renderer = try_get(
895b0931 3702 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3703 video_id = video_renderer.get('videoId')
3704 if video_id:
3705 entry = self._extract_video(video_renderer)
8bdd16b4 3706 if entry:
3707 yield entry
895b0931 3708 # playlist attachment
3709 playlist_id = try_get(
3710 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3711 if playlist_id:
3712 yield self.url_result(
e28f1c0a 3713 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3714 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3715 # inline video links
3716 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3717 for run in runs:
3718 if not isinstance(run, dict):
3719 continue
3720 ep_url = try_get(
3721 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3722 if not ep_url:
3723 continue
3724 if not YoutubeIE.suitable(ep_url):
3725 continue
3726 ep_video_id = YoutubeIE._match_id(ep_url)
3727 if video_id == ep_video_id:
3728 continue
895b0931 3729 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3730
8bdd16b4 3731 def _post_thread_continuation_entries(self, post_thread_continuation):
3732 contents = post_thread_continuation.get('contents')
3733 if not isinstance(contents, list):
3734 return
3735 for content in contents:
3736 renderer = content.get('backstagePostThreadRenderer')
3737 if not isinstance(renderer, dict):
3738 continue
3739 for entry in self._post_thread_entries(renderer):
3740 yield entry
07aeced6 3741
39ed931e 3742 r''' # unused
3743 def _rich_grid_entries(self, contents):
3744 for content in contents:
3745 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3746 if video_renderer:
3747 entry = self._video_entry(video_renderer)
3748 if entry:
3749 yield entry
3750 '''
a6213a49 3751 def _extract_entries(self, parent_renderer, continuation_list):
3752 # continuation_list is modified in-place with continuation_list = [continuation_token]
3753 continuation_list[:] = [None]
3754 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3755 for content in contents:
3756 if not isinstance(content, dict):
3757 continue
3758 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3759 if not is_renderer:
3760 renderer = content.get('richItemRenderer')
3761 if renderer:
3762 for entry in self._rich_entries(renderer):
3763 yield entry
3764 continuation_list[0] = self._extract_continuation(parent_renderer)
3765 continue
3766 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3767 for isr_content in isr_contents:
3768 if not isinstance(isr_content, dict):
8bdd16b4 3769 continue
69184e41 3770
a6213a49 3771 known_renderers = {
3772 'playlistVideoListRenderer': self._playlist_entries,
3773 'gridRenderer': self._grid_entries,
3774 'shelfRenderer': lambda x: self._shelf_entries(x),
3775 'backstagePostThreadRenderer': self._post_thread_entries,
3776 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 3777 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
3778 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
a6213a49 3779 }
3780 for key, renderer in isr_content.items():
3781 if key not in known_renderers:
3782 continue
3783 for entry in known_renderers[key](renderer):
3784 if entry:
3785 yield entry
3786 continuation_list[0] = self._extract_continuation(renderer)
3787 break
70d5c17b 3788
3789 if not continuation_list[0]:
a6213a49 3790 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 3791
a6213a49 3792 if not continuation_list[0]:
3793 continuation_list[0] = self._extract_continuation(parent_renderer)
3794
3795 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3796 continuation_list = [None]
3797 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 3798 tab_content = try_get(tab, lambda x: x['content'], dict)
3799 if not tab_content:
3800 return
3462ffa8 3801 parent_renderer = (
29f7c58a 3802 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3803 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3804 for entry in extract_entries(parent_renderer):
3805 yield entry
3462ffa8 3806 continuation = continuation_list[0]
d069eca7 3807
8bdd16b4 3808 for page_num in itertools.count(1):
3809 if not continuation:
3810 break
99e9e001 3811 headers = self.generate_api_headers(
3812 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 3813 response = self._extract_response(
3814 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3815 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3816 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3817
3818 if not response:
8bdd16b4 3819 break
ac56cf38 3820 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3821 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3822 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 3823
69184e41 3824 known_continuation_renderers = {
3825 'playlistVideoListContinuation': self._playlist_entries,
3826 'gridContinuation': self._grid_entries,
3827 'itemSectionContinuation': self._post_thread_continuation_entries,
3828 'sectionListContinuation': extract_entries, # for feeds
3829 }
8bdd16b4 3830 continuation_contents = try_get(
69184e41 3831 response, lambda x: x['continuationContents'], dict) or {}
3832 continuation_renderer = None
3833 for key, value in continuation_contents.items():
3834 if key not in known_continuation_renderers:
3462ffa8 3835 continue
69184e41 3836 continuation_renderer = value
3837 continuation_list = [None]
3838 for entry in known_continuation_renderers[key](continuation_renderer):
3839 yield entry
3840 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3841 break
3842 if continuation_renderer:
3843 continue
c5e8d7af 3844
a1b535bd 3845 known_renderers = {
3846 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3847 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3848 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3849 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3850 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3851 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3852 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3853 }
cce889b9 3854 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3855 continuation_items = try_get(
cce889b9 3856 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3857 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3858 video_items_renderer = None
3859 for key, value in continuation_item.items():
3860 if key not in known_renderers:
8bdd16b4 3861 continue
a1b535bd 3862 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3863 continuation_list = [None]
a1b535bd 3864 for entry in known_renderers[key][0](video_items_renderer):
3865 yield entry
9ba5705a 3866 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3867 break
3868 if video_items_renderer:
3869 continue
8bdd16b4 3870 break
9558dcec 3871
8bdd16b4 3872 @staticmethod
3873 def _extract_selected_tab(tabs):
3874 for tab in tabs:
cd684175 3875 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3876 if renderer.get('selected') is True:
3877 return renderer
2b3c2546 3878 else:
8bdd16b4 3879 raise ExtractorError('Unable to find selected tab')
b82f815f 3880
47193e02 3881 @classmethod
3882 def _extract_uploader(cls, data):
8bdd16b4 3883 uploader = {}
47193e02 3884 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3885 owner = try_get(
3886 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3887 if owner:
3888 uploader['uploader'] = owner.get('text')
3889 uploader['uploader_id'] = try_get(
3890 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3891 uploader['uploader_url'] = urljoin(
3892 'https://www.youtube.com/',
3893 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3894 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3895
ac56cf38 3896 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 3897 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 3898 tags = []
b60419c5 3899
8bdd16b4 3900 selected_tab = self._extract_selected_tab(tabs)
f0d785d3 3901 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
8bdd16b4 3902 renderer = try_get(
3903 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3904 if renderer:
b60419c5 3905 channel_name = renderer.get('title')
3906 channel_url = renderer.get('channelUrl')
3907 channel_id = renderer.get('externalId')
39ed931e 3908 else:
64c0d954 3909 renderer = try_get(
3910 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3911
8bdd16b4 3912 if renderer:
3913 title = renderer.get('title')
ecc97af3 3914 description = renderer.get('description', '')
b60419c5 3915 playlist_id = channel_id
3916 tags = renderer.get('keywords', '').split()
b60419c5 3917
301d07fc 3918 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
3919 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
3920 def _get_uncropped(url):
3921 return url_or_none((url or '').split('=')[0] + '=s0')
3922
3923 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
3924 if avatar_thumbnails:
3925 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
3926 if uncropped_avatar:
3927 avatar_thumbnails.append({
3928 'url': uncropped_avatar,
3929 'id': 'avatar_uncropped',
3930 'preference': 1
3931 })
3932
3933 channel_banners = self._extract_thumbnails(
3934 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
3935 for banner in channel_banners:
3936 banner['preference'] = -10
3937
3938 if channel_banners:
3939 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
3940 if uncropped_banner:
3941 channel_banners.append({
3942 'url': uncropped_banner,
3943 'id': 'banner_uncropped',
3944 'preference': -5
3945 })
3946
3947 primary_thumbnails = self._extract_thumbnails(
3948 primary_sidebar_renderer, ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail'))
a709d873 3949
3462ffa8 3950 if playlist_id is None:
70d5c17b 3951 playlist_id = item_id
f0d785d3 3952
3953 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
3954 last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
70d5c17b 3955 if title is None:
f0d785d3 3956 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
b60419c5 3957 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3958 title += format_field(selected_tab, 'expandedText', ' - %s')
f0d785d3 3959
b60419c5 3960 metadata = {
3961 'playlist_id': playlist_id,
3962 'playlist_title': title,
3963 'playlist_description': description,
3964 'uploader': channel_name,
3965 'uploader_id': channel_id,
3966 'uploader_url': channel_url,
301d07fc 3967 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
b60419c5 3968 'tags': tags,
f0d785d3 3969 'view_count': self._get_count(playlist_stats, 1),
3970 'availability': self._extract_availability(data),
3971 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
3972 'playlist_count': self._get_count(playlist_stats, 0)
b60419c5 3973 }
3974 if not channel_id:
3975 metadata.update(self._extract_uploader(data))
3976 metadata.update({
3977 'channel': metadata['uploader'],
3978 'channel_id': metadata['uploader_id'],
3979 'channel_url': metadata['uploader_url']})
3980 return self.playlist_result(
d069eca7 3981 self._entries(
ac56cf38 3982 selected_tab, playlist_id, ytcfg,
3983 self._extract_account_syncid(ytcfg, data),
3984 self._extract_visitor_data(data, ytcfg)),
b60419c5 3985 **metadata)
73c4ac2c 3986
ac56cf38 3987 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3988 first_id = last_id = response = None
2be71994 3989 for page_num in itertools.count(1):
cd7c66cf 3990 videos = list(self._playlist_entries(playlist))
3991 if not videos:
3992 return
2be71994 3993 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3994 if start >= len(videos):
3995 return
3996 for video in videos[start:]:
3997 if video['id'] == first_id:
3998 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3999 return
4000 yield video
4001 first_id = first_id or videos[0]['id']
4002 last_id = videos[-1]['id']
79360d99 4003 watch_endpoint = try_get(
4004 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 4005 headers = self.generate_api_headers(
4006 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4007 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 4008 query = {
4009 'playlistId': playlist_id,
4010 'videoId': watch_endpoint.get('videoId') or last_id,
4011 'index': watch_endpoint.get('index') or len(videos),
4012 'params': watch_endpoint.get('params') or 'OAE%3D'
4013 }
4014 response = self._extract_response(
4015 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4016 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4017 check_get_keys='contents'
4018 )
cd7c66cf 4019 playlist = try_get(
79360d99 4020 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4021
ac56cf38 4022 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 4023 title = playlist.get('title') or try_get(
4024 data, lambda x: x['titleText']['simpleText'], compat_str)
4025 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4026
4027 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4028 playlist_url = urljoin(url, try_get(
4029 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4030 compat_str))
4031 if playlist_url and playlist_url != url:
4032 return self.url_result(
4033 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4034 video_title=title)
cd7c66cf 4035
8bdd16b4 4036 return self.playlist_result(
ac56cf38 4037 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 4038 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4039
47193e02 4040 def _extract_availability(self, data):
4041 """
4042 Gets the availability of a given playlist/tab.
4043 Note: Unless YouTube tells us explicitly, we do not assume it is public
4044 @param data: response
4045 """
4046 is_private = is_unlisted = None
4047 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4048 badge_labels = self._extract_badges(renderer)
4049
4050 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4051 privacy_dropdown_entries = try_get(
4052 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4053 for renderer_dict in privacy_dropdown_entries:
4054 is_selected = try_get(
4055 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4056 if not is_selected:
4057 continue
052e1350 4058 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4059 if label:
4060 badge_labels.add(label.lower())
4061 break
4062
4063 for badge_label in badge_labels:
4064 if badge_label == 'unlisted':
4065 is_unlisted = True
4066 elif badge_label == 'private':
4067 is_private = True
4068 elif badge_label == 'public':
4069 is_unlisted = is_private = False
4070 return self._availability(is_private, False, False, False, is_unlisted)
4071
4072 @staticmethod
4073 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4074 sidebar_renderer = try_get(
4075 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4076 for item in sidebar_renderer:
4077 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4078 if renderer:
4079 return renderer
4080
ac56cf38 4081 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 4082 """
4083 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4084 """
5d342002 4085 browse_id = params = None
47193e02 4086 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4087 if not renderer:
4088 return
4089 menu_renderer = try_get(
4090 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4091 for menu_item in menu_renderer:
4092 if not isinstance(menu_item, dict):
358de58c 4093 continue
47193e02 4094 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4095 text = try_get(
4096 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4097 if not text or text.lower() != 'show unavailable videos':
4098 continue
4099 browse_endpoint = try_get(
4100 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4101 browse_id = browse_endpoint.get('browseId')
4102 params = browse_endpoint.get('params')
4103 break
5d342002 4104
11f9be09 4105 headers = self.generate_api_headers(
99e9e001 4106 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 4107 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 4108 query = {
4109 'params': params or 'wgYCCAA=',
4110 'browseId': browse_id or 'VL%s' % item_id
4111 }
4112 return self._extract_response(
4113 item_id=item_id, headers=headers, query=query,
fe93e2c4 4114 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4115 note='Downloading API JSON with unavailable videos')
358de58c 4116
ac56cf38 4117 def _extract_webpage(self, url, item_id, fatal=True):
a06916d9 4118 retries = self.get_param('extractor_retries', 3)
62bff2c1 4119 count = -1
ac56cf38 4120 webpage = data = last_error = None
14fdfea9 4121 while count < retries:
62bff2c1 4122 count += 1
14fdfea9 4123 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4124 # See: https://github.com/yt-dlp/yt-dlp/issues/116
ac56cf38 4125 if last_error:
c705177d 4126 self.report_warning('%s. Retrying ...' % last_error)
ac56cf38 4127 try:
4128 webpage = self._download_webpage(
4129 url, item_id,
4130 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
4131 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4132 except ExtractorError as e:
4133 if isinstance(e.cause, network_exceptions):
4134 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
4135 last_error = error_to_compat_str(e.cause or e.msg)
4136 if count < retries:
4137 continue
4138 if fatal:
4139 raise
4140 self.report_warning(error_to_compat_str(e))
14fdfea9 4141 break
ac56cf38 4142 else:
4143 try:
4144 self._extract_and_report_alerts(data)
4145 except ExtractorError as e:
4146 if fatal:
4147 raise
4148 self.report_warning(error_to_compat_str(e))
4149 break
4150
4151 if dict_get(data, ('contents', 'currentVideoEndpoint')):
4152 break
4153
4154 last_error = 'Incomplete yt initial data received'
4155 if count >= retries:
4156 if fatal:
4157 raise ExtractorError(last_error)
4158 self.report_warning(last_error)
4159 break
4160
cd7c66cf 4161 return webpage, data
4162
ac56cf38 4163 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4164 data = None
4165 if 'webpage' not in self._configuration_arg('skip'):
4166 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4167 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4168 if not data:
4169 if not ytcfg and self.is_authenticated:
4170 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
4171 if 'authcheck' not in self._configuration_arg('skip') and fatal:
4172 raise ExtractorError(
4173 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
4174 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4175 expected=True)
4176 self.report_warning(msg, only_once=True)
4177 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4178 return data, ytcfg
4179
4180 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4181 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4182 resolve_response = self._extract_response(
4183 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4184 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4185 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4186 for ep_key, ep in endpoints.items():
4187 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4188 if params:
4189 return self._extract_response(
4190 item_id=item_id, query=params, ep=ep, headers=headers,
4191 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4192 check_get_keys=('contents', 'currentVideoEndpoint'))
4193 err_note = 'Failed to resolve url (does the playlist exist?)'
4194 if fatal:
4195 raise ExtractorError(err_note, expected=True)
4196 self.report_warning(err_note, item_id)
4197
a6213a49 4198 @staticmethod
4199 def _smuggle_data(entries, data):
4200 for entry in entries:
4201 if data:
4202 entry['url'] = smuggle_url(entry['url'], data)
4203 yield entry
4204
4205 _SEARCH_PARAMS = None
4206
4207 def _search_results(self, query, params=NO_DEFAULT):
4208 data = {'query': query}
4209 if params is NO_DEFAULT:
4210 params = self._SEARCH_PARAMS
4211 if params:
4212 data['params'] = params
a61fd4cf 4213 continuation_list = [None]
a6213a49 4214 for page_num in itertools.count(1):
a61fd4cf 4215 data.update(continuation_list[0] or {})
a6213a49 4216 search = self._extract_response(
4217 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
a61fd4cf 4218 check_get_keys=('contents', 'onResponseReceivedCommands'))
a6213a49 4219 slr_contents = try_get(
4220 search,
4221 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4222 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4223 list)
a61fd4cf 4224 yield from self._extract_entries({'contents': slr_contents}, continuation_list)
4225 if not continuation_list[0]:
a6213a49 4226 break
4227
4228
4229class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4230 IE_DESC = 'YouTube Tabs'
4231 _VALID_URL = r'''(?x:
4232 https?://
4233 (?:\w+\.)?
4234 (?:
4235 youtube(?:kids)?\.com|
4236 %(invidious)s
4237 )/
4238 (?:
4239 (?P<channel_type>channel|c|user|browse)/|
4240 (?P<not_channel>
4241 feed/|hashtag/|
4242 (?:playlist|watch)\?.*?\blist=
4243 )|
4244 (?!(?:%(reserved_names)s)\b) # Direct URLs
4245 )
4246 (?P<id>[^/?\#&]+)
4247 )''' % {
4248 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4249 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4250 }
4251 IE_NAME = 'youtube:tab'
4252
4253 _TESTS = [{
4254 'note': 'playlists, multipage',
4255 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4256 'playlist_mincount': 94,
4257 'info_dict': {
4258 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4259 'title': 'Igor Kleiner - Playlists',
a6213a49 4260 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 4261 'uploader': 'Igor Kleiner',
a6213a49 4262 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4263 'channel': 'Igor Kleiner',
4264 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4265 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4266 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4267 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
a6213a49 4268 },
4269 }, {
4270 'note': 'playlists, multipage, different order',
4271 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4272 'playlist_mincount': 94,
4273 'info_dict': {
4274 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4275 'title': 'Igor Kleiner - Playlists',
a6213a49 4276 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4277 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4278 'uploader': 'Igor Kleiner',
4279 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4280 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4281 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4282 'channel': 'Igor Kleiner',
4283 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
a6213a49 4284 },
4285 }, {
4286 'note': 'playlists, series',
4287 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4288 'playlist_mincount': 5,
4289 'info_dict': {
4290 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4291 'title': '3Blue1Brown - Playlists',
4292 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4293 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4294 'uploader': '3Blue1Brown',
976ae3ea 4295 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4296 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4297 'channel': '3Blue1Brown',
4298 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4299 'tags': ['Mathematics'],
a6213a49 4300 },
4301 }, {
4302 'note': 'playlists, singlepage',
4303 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4304 'playlist_mincount': 4,
4305 'info_dict': {
4306 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4307 'title': 'ThirstForScience - Playlists',
4308 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4309 'uploader': 'ThirstForScience',
4310 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
976ae3ea 4311 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4312 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4313 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4314 'tags': 'count:13',
4315 'channel': 'ThirstForScience',
a6213a49 4316 }
4317 }, {
4318 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4319 'only_matching': True,
4320 }, {
4321 'note': 'basic, single video playlist',
4322 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4323 'info_dict': {
4324 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4325 'uploader': 'Sergey M.',
4326 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4327 'title': 'youtube-dl public playlist',
976ae3ea 4328 'description': '',
4329 'tags': [],
4330 'view_count': int,
4331 'modified_date': '20201130',
4332 'channel': 'Sergey M.',
4333 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4334 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4335 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4336 },
4337 'playlist_count': 1,
4338 }, {
4339 'note': 'empty playlist',
4340 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4341 'info_dict': {
4342 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4343 'uploader': 'Sergey M.',
4344 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4345 'title': 'youtube-dl empty playlist',
976ae3ea 4346 'tags': [],
4347 'channel': 'Sergey M.',
4348 'description': '',
4349 'modified_date': '20160902',
4350 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4351 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4352 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4353 },
4354 'playlist_count': 0,
4355 }, {
4356 'note': 'Home tab',
4357 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
4358 'info_dict': {
4359 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4360 'title': 'lex will - Home',
4361 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4362 'uploader': 'lex will',
4363 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4364 'channel': 'lex will',
4365 'tags': ['bible', 'history', 'prophesy'],
4366 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4367 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4368 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
a6213a49 4369 },
4370 'playlist_mincount': 2,
4371 }, {
4372 'note': 'Videos tab',
4373 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4374 'info_dict': {
4375 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4376 'title': 'lex will - Videos',
4377 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4378 'uploader': 'lex will',
4379 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4380 'tags': ['bible', 'history', 'prophesy'],
4381 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4382 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4383 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4384 'channel': 'lex will',
a6213a49 4385 },
4386 'playlist_mincount': 975,
4387 }, {
4388 'note': 'Videos tab, sorted by popular',
4389 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4390 'info_dict': {
4391 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4392 'title': 'lex will - Videos',
4393 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4394 'uploader': 'lex will',
4395 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4396 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4397 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4398 'channel': 'lex will',
4399 'tags': ['bible', 'history', 'prophesy'],
4400 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
a6213a49 4401 },
4402 'playlist_mincount': 199,
4403 }, {
4404 'note': 'Playlists tab',
4405 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4406 'info_dict': {
4407 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4408 'title': 'lex will - Playlists',
4409 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4410 'uploader': 'lex will',
4411 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4412 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4413 'channel': 'lex will',
4414 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4415 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4416 'tags': ['bible', 'history', 'prophesy'],
a6213a49 4417 },
4418 'playlist_mincount': 17,
4419 }, {
4420 'note': 'Community tab',
4421 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4422 'info_dict': {
4423 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4424 'title': 'lex will - Community',
4425 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4426 'uploader': 'lex will',
4427 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4428 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4429 'channel': 'lex will',
4430 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4431 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4432 'tags': ['bible', 'history', 'prophesy'],
a6213a49 4433 },
4434 'playlist_mincount': 18,
4435 }, {
4436 'note': 'Channels tab',
4437 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4438 'info_dict': {
4439 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4440 'title': 'lex will - Channels',
4441 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4442 'uploader': 'lex will',
4443 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4444 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4445 'channel': 'lex will',
4446 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4447 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4448 'tags': ['bible', 'history', 'prophesy'],
a6213a49 4449 },
4450 'playlist_mincount': 12,
4451 }, {
4452 'note': 'Search tab',
4453 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4454 'playlist_mincount': 40,
4455 'info_dict': {
4456 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4457 'title': '3Blue1Brown - Search - linear algebra',
4458 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4459 'uploader': '3Blue1Brown',
4460 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 4461 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4462 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4463 'tags': ['Mathematics'],
4464 'channel': '3Blue1Brown',
4465 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
a6213a49 4466 },
4467 }, {
4468 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4469 'only_matching': True,
4470 }, {
4471 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4472 'only_matching': True,
4473 }, {
4474 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4475 'only_matching': True,
4476 }, {
4477 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4478 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4479 'info_dict': {
4480 'title': '29C3: Not my department',
4481 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4482 'uploader': 'Christiaan008',
4483 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4484 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 4485 'tags': [],
4486 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4487 'view_count': int,
4488 'modified_date': '20150605',
4489 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4490 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4491 'channel': 'Christiaan008',
a6213a49 4492 },
4493 'playlist_count': 96,
4494 }, {
4495 'note': 'Large playlist',
4496 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4497 'info_dict': {
4498 'title': 'Uploads from Cauchemar',
4499 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4500 'uploader': 'Cauchemar',
4501 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 4502 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
4503 'tags': [],
4504 'modified_date': r're:\d{8}',
4505 'channel': 'Cauchemar',
4506 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
4507 'view_count': int,
4508 'description': '',
4509 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
a6213a49 4510 },
4511 'playlist_mincount': 1123,
976ae3ea 4512 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4513 }, {
4514 'note': 'even larger playlist, 8832 videos',
4515 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4516 'only_matching': True,
4517 }, {
4518 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4519 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4520 'info_dict': {
4521 'title': 'Uploads from Interstellar Movie',
4522 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4523 'uploader': 'Interstellar Movie',
4524 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 4525 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
4526 'tags': [],
4527 'view_count': int,
4528 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4529 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
4530 'channel': 'Interstellar Movie',
4531 'description': '',
4532 'modified_date': r're:\d{8}',
a6213a49 4533 },
4534 'playlist_mincount': 21,
4535 }, {
4536 'note': 'Playlist with "show unavailable videos" button',
4537 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4538 'info_dict': {
4539 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4540 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4541 'uploader': 'Phim Siêu Nhân Nhật Bản',
4542 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 4543 'view_count': int,
4544 'channel': 'Phim Siêu Nhân Nhật Bản',
4545 'tags': [],
4546 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
4547 'description': '',
4548 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
4549 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
4550 'modified_date': r're:\d{8}',
a6213a49 4551 },
4552 'playlist_mincount': 200,
976ae3ea 4553 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4554 }, {
4555 'note': 'Playlist with unavailable videos in page 7',
4556 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
4557 'info_dict': {
4558 'title': 'Uploads from BlankTV',
4559 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
4560 'uploader': 'BlankTV',
4561 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 4562 'channel': 'BlankTV',
4563 'channel_url': 'https://www.youtube.com/c/blanktv',
4564 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4565 'view_count': int,
4566 'tags': [],
4567 'uploader_url': 'https://www.youtube.com/c/blanktv',
4568 'modified_date': r're:\d{8}',
4569 'description': '',
a6213a49 4570 },
4571 'playlist_mincount': 1000,
976ae3ea 4572 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4573 }, {
4574 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
4575 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4576 'info_dict': {
4577 'title': 'Data Analysis with Dr Mike Pound',
4578 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4579 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4580 'uploader': 'Computerphile',
4581 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 4582 'uploader_url': 'https://www.youtube.com/user/Computerphile',
4583 'tags': [],
4584 'view_count': int,
4585 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4586 'channel_url': 'https://www.youtube.com/user/Computerphile',
4587 'channel': 'Computerphile',
a6213a49 4588 },
4589 'playlist_mincount': 11,
4590 }, {
4591 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4592 'only_matching': True,
4593 }, {
4594 'note': 'Playlist URL that does not actually serve a playlist',
4595 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
4596 'info_dict': {
4597 'id': 'FqZTN594JQw',
4598 'ext': 'webm',
4599 'title': "Smiley's People 01 detective, Adventure Series, Action",
4600 'uploader': 'STREEM',
4601 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4602 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4603 'upload_date': '20150526',
4604 'license': 'Standard YouTube License',
4605 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4606 'categories': ['People & Blogs'],
4607 'tags': list,
4608 'view_count': int,
4609 'like_count': int,
a6213a49 4610 },
4611 'params': {
4612 'skip_download': True,
4613 },
4614 'skip': 'This video is not available.',
4615 'add_ie': [YoutubeIE.ie_key()],
4616 }, {
4617 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4618 'only_matching': True,
4619 }, {
4620 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4621 'only_matching': True,
4622 }, {
4623 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4624 'info_dict': {
976ae3ea 4625 'id': 'zpsbVPFwsqk', # This will keep changing
a6213a49 4626 'ext': 'mp4',
976ae3ea 4627 'title': str,
a6213a49 4628 'uploader': 'Sky News',
4629 'uploader_id': 'skynews',
4630 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4631 'upload_date': r're:\d{8}',
976ae3ea 4632 'description': str,
a6213a49 4633 'categories': ['News & Politics'],
4634 'tags': list,
4635 'like_count': int,
976ae3ea 4636 'release_timestamp': 1640164857,
4637 'channel': 'Sky News',
4638 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
4639 'age_limit': 0,
4640 'view_count': int,
4641 'thumbnail': 'https://i.ytimg.com/vi/zpsbVPFwsqk/maxresdefault_live.jpg',
4642 'playable_in_embed': True,
4643 'release_date': '20211222',
4644 'availability': 'public',
4645 'live_status': 'is_live',
4646 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
a6213a49 4647 },
4648 'params': {
4649 'skip_download': True,
4650 },
976ae3ea 4651 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 4652 }, {
4653 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4654 'info_dict': {
4655 'id': 'a48o2S1cPoo',
4656 'ext': 'mp4',
4657 'title': 'The Young Turks - Live Main Show',
4658 'uploader': 'The Young Turks',
4659 'uploader_id': 'TheYoungTurks',
4660 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4661 'upload_date': '20150715',
4662 'license': 'Standard YouTube License',
4663 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4664 'categories': ['News & Politics'],
4665 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4666 'like_count': int,
a6213a49 4667 },
4668 'params': {
4669 'skip_download': True,
4670 },
4671 'only_matching': True,
4672 }, {
4673 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4674 'only_matching': True,
4675 }, {
4676 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4677 'only_matching': True,
4678 }, {
4679 'note': 'A channel that is not live. Should raise error',
4680 'url': 'https://www.youtube.com/user/numberphile/live',
4681 'only_matching': True,
4682 }, {
4683 'url': 'https://www.youtube.com/feed/trending',
4684 'only_matching': True,
4685 }, {
4686 'url': 'https://www.youtube.com/feed/library',
4687 'only_matching': True,
4688 }, {
4689 'url': 'https://www.youtube.com/feed/history',
4690 'only_matching': True,
4691 }, {
4692 'url': 'https://www.youtube.com/feed/subscriptions',
4693 'only_matching': True,
4694 }, {
4695 'url': 'https://www.youtube.com/feed/watch_later',
4696 'only_matching': True,
4697 }, {
4698 'note': 'Recommended - redirects to home page.',
4699 'url': 'https://www.youtube.com/feed/recommended',
4700 'only_matching': True,
4701 }, {
4702 'note': 'inline playlist with not always working continuations',
4703 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4704 'only_matching': True,
4705 }, {
4706 'url': 'https://www.youtube.com/course',
4707 'only_matching': True,
4708 }, {
4709 'url': 'https://www.youtube.com/zsecurity',
4710 'only_matching': True,
4711 }, {
4712 'url': 'http://www.youtube.com/NASAgovVideo/videos',
4713 'only_matching': True,
4714 }, {
4715 'url': 'https://www.youtube.com/TheYoungTurks/live',
4716 'only_matching': True,
4717 }, {
4718 'url': 'https://www.youtube.com/hashtag/cctv9',
4719 'info_dict': {
4720 'id': 'cctv9',
4721 'title': '#cctv9',
976ae3ea 4722 'tags': [],
a6213a49 4723 },
4724 'playlist_mincount': 350,
4725 }, {
4726 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4727 'only_matching': True,
4728 }, {
4729 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4730 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4731 'only_matching': True
4732 }, {
4733 'note': '/browse/ should redirect to /channel/',
4734 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4735 'only_matching': True
4736 }, {
4737 'note': 'VLPL, should redirect to playlist?list=PL...',
4738 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4739 'info_dict': {
4740 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4741 'uploader': 'NoCopyrightSounds',
4742 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
4743 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4744 'title': 'NCS Releases',
976ae3ea 4745 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
4746 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
4747 'modified_date': r're:\d{8}',
4748 'view_count': int,
4749 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4750 'tags': [],
4751 'channel': 'NoCopyrightSounds',
a6213a49 4752 },
4753 'playlist_mincount': 166,
976ae3ea 4754 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4755 }, {
4756 'note': 'Topic, should redirect to playlist?list=UU...',
4757 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4758 'info_dict': {
4759 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4760 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4761 'title': 'Uploads from Royalty Free Music - Topic',
4762 'uploader': 'Royalty Free Music - Topic',
976ae3ea 4763 'tags': [],
4764 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4765 'channel': 'Royalty Free Music - Topic',
4766 'view_count': int,
4767 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
4768 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
4769 'modified_date': r're:\d{8}',
4770 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
4771 'description': '',
a6213a49 4772 },
4773 'expected_warnings': [
a6213a49 4774 'The URL does not have a videos tab',
976ae3ea 4775 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 4776 ],
4777 'playlist_mincount': 101,
4778 }, {
4779 'note': 'Topic without a UU playlist',
4780 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
4781 'info_dict': {
4782 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
4783 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 4784 'tags': [],
a6213a49 4785 },
4786 'expected_warnings': [
976ae3ea 4787 'the playlist redirect gave error',
a6213a49 4788 ],
4789 'playlist_mincount': 9,
4790 }, {
4791 'note': 'Youtube music Album',
4792 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
4793 'info_dict': {
4794 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
4795 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 4796 'tags': [],
4797 'view_count': int,
4798 'description': '',
4799 'availability': 'unlisted',
4800 'modified_date': r're:\d{8}',
a6213a49 4801 },
4802 'playlist_count': 50,
4803 }, {
4804 'note': 'unlisted single video playlist',
4805 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4806 'info_dict': {
4807 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4808 'uploader': 'colethedj',
4809 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4810 'title': 'yt-dlp unlisted playlist test',
976ae3ea 4811 'availability': 'unlisted',
4812 'tags': [],
4813 'modified_date': '20211208',
4814 'channel': 'colethedj',
4815 'view_count': int,
4816 'description': '',
4817 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
4818 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4819 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
a6213a49 4820 },
4821 'playlist_count': 1,
4822 }, {
4823 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
4824 'url': 'https://www.youtube.com/feed/recommended',
4825 'info_dict': {
4826 'id': 'recommended',
4827 'title': 'recommended',
4828 },
4829 'playlist_mincount': 50,
4830 'params': {
4831 'skip_download': True,
4832 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4833 },
4834 }, {
4835 'note': 'API Fallback: /videos tab, sorted by oldest first',
4836 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
4837 'info_dict': {
4838 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4839 'title': 'Cody\'sLab - Videos',
4840 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
4841 'uploader': 'Cody\'sLab',
4842 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
976ae3ea 4843 'channel': 'Cody\'sLab',
4844 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4845 'tags': [],
4846 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
4847 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
a6213a49 4848 },
4849 'playlist_mincount': 650,
4850 'params': {
4851 'skip_download': True,
4852 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4853 },
4854 }, {
4855 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
4856 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4857 'info_dict': {
4858 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4859 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4860 'title': 'Uploads from Royalty Free Music - Topic',
4861 'uploader': 'Royalty Free Music - Topic',
976ae3ea 4862 'modified_date': r're:\d{8}',
4863 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4864 'description': '',
4865 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
4866 'tags': [],
4867 'channel': 'Royalty Free Music - Topic',
4868 'view_count': int,
4869 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 4870 },
4871 'expected_warnings': [
976ae3ea 4872 'does not have a videos tab',
4873 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 4874 ],
4875 'playlist_mincount': 101,
4876 'params': {
4877 'skip_download': True,
4878 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4879 },
4880 }]
4881
4882 @classmethod
4883 def suitable(cls, url):
4884 return False if YoutubeIE.suitable(url) else super(
4885 YoutubeTabIE, cls).suitable(url)
9297939e 4886
cd7c66cf 4887 def _real_extract(self, url):
9297939e 4888 url, smuggled_data = unsmuggle_url(url, {})
4889 if self.is_music_url(url):
4890 smuggled_data['is_music_url'] = True
fe03a6cd 4891 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4892 if info_dict.get('entries'):
4893 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4894 return info_dict
4895
64f36541 4896 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
fe03a6cd 4897
4898 def __real_extract(self, url, smuggled_data):
cd7c66cf 4899 item_id = self._match_id(url)
4900 url = compat_urlparse.urlunparse(
4901 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4902 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4903
fe03a6cd 4904 def get_mobj(url):
37e57a9f 4905 mobj = self._URL_RE.match(url).groupdict()
07cce701 4906 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4907 return mobj
4908
37e57a9f 4909 mobj, redirect_warning = get_mobj(url), None
fe03a6cd 4910 # Youtube returns incomplete data if tabname is not lower case
4911 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 4912 if is_channel:
4913 if smuggled_data.get('is_music_url'):
37e57a9f 4914 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
fe03a6cd 4915 item_id = item_id[2:]
37e57a9f 4916 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
4917 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
ac56cf38 4918 mdata = self._extract_tab_endpoint(
37e57a9f 4919 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
4920 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
4921 get_all=False, expected_type=compat_str)
ac56cf38 4922 if not murl:
37e57a9f 4923 raise ExtractorError('Failed to resolve album to playlist')
ac56cf38 4924 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
37e57a9f 4925 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
4926 pre = f'https://www.youtube.com/channel/{item_id}'
4927
64f36541 4928 original_tab_name = tab
fe03a6cd 4929 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4930 # Home URLs should redirect to /videos/
37e57a9f 4931 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
4932 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4933 tab = '/videos'
4934
4935 url = ''.join((pre, tab, post))
4936 mobj = get_mobj(url)
cd7c66cf 4937
4938 # Handle both video/playlist URLs
201c1459 4939 qs = parse_qs(url)
37e57a9f 4940 video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]
cd7c66cf 4941
fe03a6cd 4942 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4943 if not playlist_id:
fe03a6cd 4944 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4945 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4946 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 4947 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
4948 url = f'https://www.youtube.com/playlist?list={playlist_id}'
18db7548 4949 mobj = get_mobj(url)
cd7c66cf 4950
4951 if video_id and playlist_id:
a06916d9 4952 if self.get_param('noplaylist'):
37e57a9f 4953 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
4954 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4955 ie=YoutubeIE.ie_key(), video_id=video_id)
4956 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
cd7c66cf 4957
ac56cf38 4958 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 4959
37e57a9f 4960 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
18db7548 4961 if tabs:
4962 selected_tab = self._extract_selected_tab(tabs)
64f36541 4963 selected_tab_name = selected_tab.get('title', '').lower()
4964 if selected_tab_name == 'home':
4965 selected_tab_name = 'featured'
4966 requested_tab_name = mobj['tab'][1:]
09f1580e 4967 if 'no-youtube-channel-redirect' not in compat_opts:
64f36541 4968 if requested_tab_name == 'live':
09f1580e 4969 # Live tab should have redirected to the video
4970 raise ExtractorError('The channel is not currently live', expected=True)
64f36541 4971 if requested_tab_name not in ('', selected_tab_name):
4972 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
4973 if not original_tab_name:
4974 if item_id[:2] == 'UC':
4975 # Topic channels don't have /videos. Use the equivalent playlist instead
4976 pl_id = f'UU{item_id[2:]}'
4977 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
4978 try:
4979 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
4980 except ExtractorError:
4981 redirect_warning += ' and the playlist redirect gave error'
4982 else:
4983 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
4984 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
4985 if selected_tab_name and selected_tab_name != requested_tab_name:
4986 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
4987 else:
4988 raise ExtractorError(redirect_warning, expected=True)
18db7548 4989
37e57a9f 4990 if redirect_warning:
64f36541 4991 self.to_screen(redirect_warning)
37e57a9f 4992 self.write_debug(f'Final URL: {url}')
18db7548 4993
358de58c 4994 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4995 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 4996 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 4997 self._extract_and_report_alerts(data, only_once=True)
37e57a9f 4998 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
8bdd16b4 4999 if tabs:
ac56cf38 5000 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 5001
37e57a9f 5002 playlist = traverse_obj(
5003 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 5004 if playlist:
ac56cf38 5005 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 5006
37e57a9f 5007 video_id = traverse_obj(
5008 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 5009 if video_id:
09f1580e 5010 if mobj['tab'] != '/live': # live tab is expected to redirect to video
37e57a9f 5011 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5012 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5013 ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 5014
8bdd16b4 5015 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 5016
c5e8d7af 5017
8bdd16b4 5018class YoutubePlaylistIE(InfoExtractor):
96565c7e 5019 IE_DESC = 'YouTube playlists'
8bdd16b4 5020 _VALID_URL = r'''(?x)(?:
5021 (?:https?://)?
5022 (?:\w+\.)?
5023 (?:
5024 (?:
5025 youtube(?:kids)?\.com|
d9190e44 5026 %(invidious)s
8bdd16b4 5027 )
5028 /.*?\?.*?\blist=
5029 )?
5030 (?P<id>%(playlist_id)s)
d9190e44
RH
5031 )''' % {
5032 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5033 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5034 }
8bdd16b4 5035 IE_NAME = 'youtube:playlist'
cdc628a4 5036 _TESTS = [{
8bdd16b4 5037 'note': 'issue #673',
5038 'url': 'PLBB231211A4F62143',
cdc628a4 5039 'info_dict': {
8bdd16b4 5040 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5041 'id': 'PLBB231211A4F62143',
976ae3ea 5042 'uploader': 'Wickman',
8bdd16b4 5043 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 5044 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 5045 'view_count': int,
5046 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5047 'modified_date': r're:\d{8}',
5048 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5049 'channel': 'Wickman',
5050 'tags': [],
5051 'channel_url': 'https://www.youtube.com/user/Wickydoo',
8bdd16b4 5052 },
5053 'playlist_mincount': 29,
5054 }, {
5055 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5056 'info_dict': {
5057 'title': 'YDL_safe_search',
5058 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5059 },
5060 'playlist_count': 2,
5061 'skip': 'This playlist is private',
9558dcec 5062 }, {
8bdd16b4 5063 'note': 'embedded',
5064 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5065 'playlist_count': 4,
9558dcec 5066 'info_dict': {
8bdd16b4 5067 'title': 'JODA15',
5068 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5069 'uploader': 'milan',
5070 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
976ae3ea 5071 'description': '',
5072 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5073 'tags': [],
5074 'modified_date': '20140919',
5075 'view_count': int,
5076 'channel': 'milan',
5077 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5078 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5079 },
5080 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
cdc628a4 5081 }, {
8bdd16b4 5082 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 5083 'playlist_mincount': 654,
8bdd16b4 5084 'info_dict': {
5085 'title': '2018 Chinese New Singles (11/6 updated)',
5086 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5087 'uploader': 'LBK',
5088 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 5089 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 5090 'channel': 'LBK',
5091 'view_count': int,
5092 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5093 'tags': [],
5094 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5095 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5096 'modified_date': r're:\d{8}',
5097 },
5098 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 5099 }, {
29f7c58a 5100 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5101 'only_matching': True,
5102 }, {
5103 # music album playlist
5104 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5105 'only_matching': True,
5106 }]
5107
5108 @classmethod
5109 def suitable(cls, url):
201c1459 5110 if YoutubeTabIE.suitable(url):
5111 return False
49a57e70 5112 from ..utils import parse_qs
201c1459 5113 qs = parse_qs(url)
5114 if qs.get('v', [None])[0]:
5115 return False
5116 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 5117
5118 def _real_extract(self, url):
5119 playlist_id = self._match_id(url)
46953e7e 5120 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 5121 url = update_url_query(
5122 'https://www.youtube.com/playlist',
5123 parse_qs(url) or {'list': playlist_id})
5124 if is_music_url:
5125 url = smuggle_url(url, {'is_music_url': True})
5126 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 5127
5128
5129class YoutubeYtBeIE(InfoExtractor):
c76eb41b 5130 IE_DESC = 'youtu.be'
29f7c58a 5131 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5132 _TESTS = [{
8bdd16b4 5133 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5134 'info_dict': {
5135 'id': 'yeWKywCrFtk',
5136 'ext': 'mp4',
5137 'title': 'Small Scale Baler and Braiding Rugs',
5138 'uploader': 'Backus-Page House Museum',
5139 'uploader_id': 'backuspagemuseum',
5140 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5141 'upload_date': '20161008',
5142 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5143 'categories': ['Nonprofits & Activism'],
5144 'tags': list,
5145 'like_count': int,
976ae3ea 5146 'age_limit': 0,
5147 'playable_in_embed': True,
5148 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5149 'channel': 'Backus-Page House Museum',
5150 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5151 'live_status': 'not_live',
5152 'view_count': int,
5153 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5154 'availability': 'public',
5155 'duration': 59,
8bdd16b4 5156 },
5157 'params': {
5158 'noplaylist': True,
5159 'skip_download': True,
5160 },
39e7107d 5161 }, {
8bdd16b4 5162 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 5163 'only_matching': True,
cdc628a4
PH
5164 }]
5165
8bdd16b4 5166 def _real_extract(self, url):
5ad28e7f 5167 mobj = self._match_valid_url(url)
29f7c58a 5168 video_id = mobj.group('id')
5169 playlist_id = mobj.group('playlist_id')
8bdd16b4 5170 return self.url_result(
29f7c58a 5171 update_url_query('https://www.youtube.com/watch', {
5172 'v': video_id,
5173 'list': playlist_id,
5174 'feature': 'youtu.be',
5175 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 5176
5177
b6ce9bb0 5178class YoutubeLivestreamEmbedIE(InfoExtractor):
5179 IE_DESC = 'YouTube livestream embeds'
5180 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5181 _TESTS = [{
5182 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5183 'only_matching': True,
5184 }]
5185
5186 def _real_extract(self, url):
5187 channel_id = self._match_id(url)
5188 return self.url_result(
5189 f'https://www.youtube.com/channel/{channel_id}/live',
5190 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5191
5192
8bdd16b4 5193class YoutubeYtUserIE(InfoExtractor):
96565c7e 5194 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 5195 IE_NAME = 'youtube:user'
8bdd16b4 5196 _VALID_URL = r'ytuser:(?P<id>.+)'
5197 _TESTS = [{
5198 'url': 'ytuser:phihag',
5199 'only_matching': True,
5200 }]
5201
5202 def _real_extract(self, url):
5203 user_id = self._match_id(url)
5204 return self.url_result(
c586f9e8 5205 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 5206 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 5207
b05654f0 5208
3d3dddc9 5209class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 5210 IE_NAME = 'youtube:favorites'
96565c7e 5211 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 5212 _VALID_URL = r':ytfav(?:ou?rite)?s?'
5213 _LOGIN_REQUIRED = True
5214 _TESTS = [{
5215 'url': ':ytfav',
5216 'only_matching': True,
5217 }, {
5218 'url': ':ytfavorites',
5219 'only_matching': True,
5220 }]
5221
5222 def _real_extract(self, url):
5223 return self.url_result(
5224 'https://www.youtube.com/playlist?list=LL',
5225 ie=YoutubeTabIE.ie_key())
5226
5227
a6213a49 5228class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5229 IE_DESC = 'YouTube search'
78caa52a 5230 IE_NAME = 'youtube:search'
b05654f0 5231 _SEARCH_KEY = 'ytsearch'
a61fd4cf 5232 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
9dd8e46a 5233 _TESTS = []
b05654f0 5234
a61fd4cf 5235
5f7cb91a 5236class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 5237 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 5238 _SEARCH_KEY = 'ytsearchdate'
a6213a49 5239 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 5240 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
75dff0ee 5241
c9ae7b95 5242
a6213a49 5243class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 5244 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 5245 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
5246 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
3462ffa8 5247 _TESTS = [{
5248 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
5249 'playlist_mincount': 5,
5250 'info_dict': {
11f9be09 5251 'id': 'youtube-dl test video',
3462ffa8 5252 'title': 'youtube-dl test video',
5253 }
a61fd4cf 5254 }, {
5255 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
5256 'playlist_mincount': 5,
5257 'info_dict': {
5258 'id': 'python',
5259 'title': 'python',
5260 }
5261
3462ffa8 5262 }, {
5263 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
5264 'only_matching': True,
5265 }]
5266
5267 def _real_extract(self, url):
4dfbf869 5268 qs = parse_qs(url)
386e1dd9 5269 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 5270 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 5271
5272
5273class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 5274 """
25f14e9f 5275 Base class for feed extractors
3d3dddc9 5276 Subclasses must define the _FEED_NAME property.
d7ae0639 5277 """
b2e8bc1b 5278 _LOGIN_REQUIRED = True
ef2f3c7f 5279 _TESTS = []
d7ae0639
JMF
5280
5281 @property
5282 def IE_NAME(self):
78caa52a 5283 return 'youtube:%s' % self._FEED_NAME
04cc9617 5284
3853309f 5285 def _real_extract(self, url):
3d3dddc9 5286 return self.url_result(
5287 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
5288 ie=YoutubeTabIE.ie_key())
25f14e9f
S
5289
5290
ef2f3c7f 5291class YoutubeWatchLaterIE(InfoExtractor):
5292 IE_NAME = 'youtube:watchlater'
96565c7e 5293 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 5294 _VALID_URL = r':ytwatchlater'
bc7a9cd8 5295 _TESTS = [{
8bdd16b4 5296 'url': ':ytwatchlater',
bc7a9cd8
S
5297 'only_matching': True,
5298 }]
25f14e9f
S
5299
5300 def _real_extract(self, url):
ef2f3c7f 5301 return self.url_result(
5302 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 5303
5304
25f14e9f 5305class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 5306 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 5307 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 5308 _FEED_NAME = 'recommended'
45db527f 5309 _LOGIN_REQUIRED = False
3d3dddc9 5310 _TESTS = [{
5311 'url': ':ytrec',
5312 'only_matching': True,
5313 }, {
5314 'url': ':ytrecommended',
5315 'only_matching': True,
5316 }, {
5317 'url': 'https://youtube.com',
5318 'only_matching': True,
5319 }]
1ed5b5c9 5320
1ed5b5c9 5321
25f14e9f 5322class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 5323 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 5324 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 5325 _FEED_NAME = 'subscriptions'
3d3dddc9 5326 _TESTS = [{
5327 'url': ':ytsubs',
5328 'only_matching': True,
5329 }, {
5330 'url': ':ytsubscriptions',
5331 'only_matching': True,
5332 }]
1ed5b5c9 5333
1ed5b5c9 5334
25f14e9f 5335class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 5336 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 5337 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 5338 _FEED_NAME = 'history'
3d3dddc9 5339 _TESTS = [{
5340 'url': ':ythistory',
5341 'only_matching': True,
5342 }]
1ed5b5c9
JMF
5343
5344
15870e90
PH
5345class YoutubeTruncatedURLIE(InfoExtractor):
5346 IE_NAME = 'youtube:truncated_url'
5347 IE_DESC = False # Do not list
975d35db 5348 _VALID_URL = r'''(?x)
b95aab84
PH
5349 (?:https?://)?
5350 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
5351 (?:watch\?(?:
c4808c60 5352 feature=[a-z_]+|
b95aab84
PH
5353 annotation_id=annotation_[^&]+|
5354 x-yt-cl=[0-9]+|
c1708b89 5355 hl=[^&]*|
287be8c6 5356 t=[0-9]+
b95aab84
PH
5357 )?
5358 |
5359 attribution_link\?a=[^&]+
5360 )
5361 $
975d35db 5362 '''
15870e90 5363
c4808c60 5364 _TESTS = [{
2d3d2997 5365 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 5366 'only_matching': True,
dc2fc736 5367 }, {
2d3d2997 5368 'url': 'https://www.youtube.com/watch?',
dc2fc736 5369 'only_matching': True,
b95aab84
PH
5370 }, {
5371 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
5372 'only_matching': True,
5373 }, {
5374 'url': 'https://www.youtube.com/watch?feature=foo',
5375 'only_matching': True,
c1708b89
PH
5376 }, {
5377 'url': 'https://www.youtube.com/watch?hl=en-GB',
5378 'only_matching': True,
287be8c6
PH
5379 }, {
5380 'url': 'https://www.youtube.com/watch?t=2372',
5381 'only_matching': True,
c4808c60
PH
5382 }]
5383
15870e90
PH
5384 def _real_extract(self, url):
5385 raise ExtractorError(
78caa52a
PH
5386 'Did you forget to quote the URL? Remember that & is a meta '
5387 'character in most shells, so you want to put the URL in quotes, '
3867038a 5388 'like youtube-dl '
2d3d2997 5389 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 5390 ' or simply youtube-dl BaW_jenozKc .',
15870e90 5391 expected=True)
772fd5cc
PH
5392
5393
3cd786db 5394class YoutubeClipIE(InfoExtractor):
5395 IE_NAME = 'youtube:clip'
5396 IE_DESC = False # Do not list
5397 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
5398
5399 def _real_extract(self, url):
5400 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
5401 return self.url_result(url, 'Generic')
5402
5403
772fd5cc
PH
5404class YoutubeTruncatedIDIE(InfoExtractor):
5405 IE_NAME = 'youtube:truncated_id'
5406 IE_DESC = False # Do not list
b95aab84 5407 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
5408
5409 _TESTS = [{
5410 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
5411 'only_matching': True,
5412 }]
5413
5414 def _real_extract(self, url):
5415 video_id = self._match_id(url)
5416 raise ExtractorError(
5417 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
5418 expected=True)