]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[downloader/external] Smarter detection of executable
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
6e634cbe 1import base64
d92f5d5a 2import calendar
109dd3b2 3import copy
fe93e2c4 4import datetime
a5c56234 5import hashlib
0ca96d48 6import itertools
c5e8d7af 7import json
720c3099 8import math
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
46383212 12import sys
f8271158 13import threading
8a784c74 14import time
e0df6211 15import traceback
14f25df2 16import urllib.error
ac668111 17import urllib.parse
c5e8d7af 18
b05654f0 19from .common import InfoExtractor, SearchInfoExtractor
25836db6 20from .openload import PhantomJSwrapper
14f25df2 21from ..compat import functools
545cc85d 22from ..jsinterp import JSInterpreter
4bb4a188 23from ..utils import (
f8271158 24 NO_DEFAULT,
25 ExtractorError,
693f0600 26 UserNotLive,
720c3099 27 bug_reports_message,
82d02080 28 classproperty,
c5e8d7af 29 clean_html,
d92f5d5a 30 datetime_from_str,
11f9be09 31 dict_get,
2d30521a 32 float_or_none,
11f9be09 33 format_field,
ff91cf74 34 get_first,
dd27fd17 35 int_or_none,
641ad5d8 36 is_html,
34921b43 37 join_nonempty,
48416bc4 38 js_to_json,
94278f72 39 mimetype2ext,
9c0d7f49 40 network_exceptions,
11f9be09 41 orderedSet,
6310acf5 42 parse_codecs,
49bd8c66 43 parse_count,
7c80519c 44 parse_duration,
7ea65411 45 parse_iso8601,
4dfbf869 46 parse_qs,
dca3ff4a 47 qualities,
3995d37d 48 remove_start,
cf7e015f 49 smuggle_url,
dbdaaa23 50 str_or_none,
c93d53f5 51 str_to_int,
f3aa3c3f 52 strftime_or_none,
7c365c21 53 traverse_obj,
556dbe7f 54 try_get,
c5e8d7af
PH
55 unescapeHTML,
56 unified_strdate,
f0d785d3 57 unified_timestamp,
cf7e015f 58 unsmuggle_url,
8bdd16b4 59 update_url_query,
21c340b8 60 url_or_none,
fe93e2c4 61 urljoin,
7c365c21 62 variadic,
c5e8d7af
PH
63)
64
962ffcf8 65# any clients starting with _ cannot be explicitly requested by the user
000c15a4 66INNERTUBE_CLIENTS = {
67 'web': {
68 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
69 'INNERTUBE_CONTEXT': {
70 'client': {
71 'clientName': 'WEB',
a0c830f4 72 'clientVersion': '2.20220801.00.00',
000c15a4 73 }
74 },
75 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
76 },
77 'web_embedded': {
78 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
79 'INNERTUBE_CONTEXT': {
80 'client': {
81 'clientName': 'WEB_EMBEDDED_PLAYER',
a0c830f4 82 'clientVersion': '1.20220731.00.00',
000c15a4 83 },
84 },
85 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
86 },
87 'web_music': {
88 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
89 'INNERTUBE_HOST': 'music.youtube.com',
90 'INNERTUBE_CONTEXT': {
91 'client': {
92 'clientName': 'WEB_REMIX',
a0c830f4 93 'clientVersion': '1.20220727.01.00',
000c15a4 94 }
95 },
96 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
97 },
e7e94f2a 98 'web_creator': {
18c7683d 99 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
100 'INNERTUBE_CONTEXT': {
101 'client': {
102 'clientName': 'WEB_CREATOR',
a0c830f4 103 'clientVersion': '1.20220726.00.00',
e7e94f2a
D
104 }
105 },
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
107 },
000c15a4 108 'android': {
18c7683d 109 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 110 'INNERTUBE_CONTEXT': {
111 'client': {
112 'clientName': 'ANDROID',
50ac0e54 113 'clientVersion': '17.31.35',
114 'androidSdkVersion': 30,
115 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 116 }
117 },
118 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 119 'REQUIRE_JS_PLAYER': False
000c15a4 120 },
121 'android_embedded': {
18c7683d 122 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 123 'INNERTUBE_CONTEXT': {
124 'client': {
125 'clientName': 'ANDROID_EMBEDDED_PLAYER',
50ac0e54 126 'clientVersion': '17.31.35',
127 'androidSdkVersion': 30,
128 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 129 },
130 },
b6de707d 131 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
132 'REQUIRE_JS_PLAYER': False
000c15a4 133 },
134 'android_music': {
18c7683d 135 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 136 'INNERTUBE_CONTEXT': {
137 'client': {
138 'clientName': 'ANDROID_MUSIC',
a0c830f4 139 'clientVersion': '5.16.51',
50ac0e54 140 'androidSdkVersion': 30,
141 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
000c15a4 142 }
143 },
144 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 145 'REQUIRE_JS_PLAYER': False
000c15a4 146 },
e7e94f2a 147 'android_creator': {
18c7683d 148 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
149 'INNERTUBE_CONTEXT': {
150 'client': {
151 'clientName': 'ANDROID_CREATOR',
50ac0e54 152 'clientVersion': '22.30.100',
153 'androidSdkVersion': 30,
154 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
e7e94f2a
D
155 },
156 },
b6de707d 157 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
158 'REQUIRE_JS_PLAYER': False
e7e94f2a 159 },
18c7683d 160 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
161 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 162 'ios': {
18c7683d 163 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 164 'INNERTUBE_CONTEXT': {
165 'client': {
166 'clientName': 'IOS',
224b5a35 167 'clientVersion': '17.33.2',
18c7683d 168 'deviceModel': 'iPhone14,3',
224b5a35 169 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 170 }
171 },
b6de707d 172 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
173 'REQUIRE_JS_PLAYER': False
000c15a4 174 },
175 'ios_embedded': {
000c15a4 176 'INNERTUBE_CONTEXT': {
177 'client': {
178 'clientName': 'IOS_MESSAGES_EXTENSION',
224b5a35 179 'clientVersion': '17.33.2',
18c7683d 180 'deviceModel': 'iPhone14,3',
224b5a35 181 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 182 },
183 },
b6de707d 184 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
185 'REQUIRE_JS_PLAYER': False
000c15a4 186 },
187 'ios_music': {
18c7683d 188 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 189 'INNERTUBE_CONTEXT': {
190 'client': {
191 'clientName': 'IOS_MUSIC',
224b5a35
SF
192 'clientVersion': '5.21',
193 'deviceModel': 'iPhone14,3',
194 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 195 },
196 },
b6de707d 197 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
198 'REQUIRE_JS_PLAYER': False
000c15a4 199 },
e7e94f2a
D
200 'ios_creator': {
201 'INNERTUBE_CONTEXT': {
202 'client': {
203 'clientName': 'IOS_CREATOR',
224b5a35
SF
204 'clientVersion': '22.33.101',
205 'deviceModel': 'iPhone14,3',
206 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
e7e94f2a
D
207 },
208 },
b6de707d 209 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
210 'REQUIRE_JS_PLAYER': False
e7e94f2a 211 },
3619f78d 212 # mweb has 'ultralow' formats
213 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 214 'mweb': {
18c7683d 215 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 216 'INNERTUBE_CONTEXT': {
217 'client': {
218 'clientName': 'MWEB',
a0c830f4 219 'clientVersion': '2.20220801.00.00',
000c15a4 220 }
221 },
222 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
e7870111
D
223 },
224 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
225 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
226 'tv_embedded': {
227 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
228 'INNERTUBE_CONTEXT': {
229 'client': {
230 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
231 'clientVersion': '2.0',
232 },
233 },
234 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
235 },
000c15a4 236}
237
238
e7870111
D
239def _split_innertube_client(client_name):
240 variant, *base = client_name.rsplit('.', 1)
241 if base:
242 return variant, base[0], variant
243 base, *variant = client_name.split('_', 1)
244 return client_name, base, variant[0] if variant else None
245
246
000c15a4 247def build_innertube_clients():
2e4cacd0 248 THIRD_PARTY = {
e7870111 249 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
65c2fde2 250 }
e7870111 251 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
2e4cacd0 252 priority = qualities(BASE_CLIENTS[::-1])
000c15a4 253
254 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 255 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 256 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 257 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 258 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
000c15a4 259
e7870111 260 _, base_client, variant = _split_innertube_client(client)
2e4cacd0 261 ytcfg['priority'] = 10 * priority(base_client)
262
e48b3875 263 if not variant:
e7870111
D
264 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
265 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
266 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
267 embedscreen['priority'] -= 3
268 elif variant == 'embedded':
e48b3875 269 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
000c15a4 270 ytcfg['priority'] -= 2
e48b3875 271 else:
000c15a4 272 ytcfg['priority'] -= 3
273
274
275build_innertube_clients()
276
277
de7f3446 278class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 279 """Provide base functions for Youtube extractors"""
e00eb564 280
3462ffa8 281 _RESERVED_NAMES = (
3cd786db 282 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
182bda88 283 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
3619f78d 284 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 285 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 286
3619f78d 287 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
288
52efa4b3 289 # _NETRC_MACHINE = 'youtube'
3619f78d 290
b2e8bc1b
JMF
291 # If True it will raise an error if no login info is provided
292 _LOGIN_REQUIRED = False
293
d9190e44
RH
294 _INVIDIOUS_SITES = (
295 # invidious-redirect websites
296 r'(?:www\.)?redirect\.invidious\.io',
297 r'(?:(?:www|dev)\.)?invidio\.us',
0a41f331 298 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
d9190e44
RH
299 r'(?:www\.)?invidious\.pussthecat\.org',
300 r'(?:www\.)?invidious\.zee\.li',
301 r'(?:www\.)?invidious\.ethibox\.fr',
302 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
303 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
304 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
305 # youtube-dl invidious instances list
306 r'(?:(?:www|no)\.)?invidiou\.sh',
307 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
308 r'(?:www\.)?invidious\.kabi\.tk',
309 r'(?:www\.)?invidious\.mastodon\.host',
310 r'(?:www\.)?invidious\.zapashcanon\.fr',
311 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
312 r'(?:www\.)?invidious\.tinfoil-hat\.net',
313 r'(?:www\.)?invidious\.himiko\.cloud',
314 r'(?:www\.)?invidious\.reallyancient\.tech',
315 r'(?:www\.)?invidious\.tube',
316 r'(?:www\.)?invidiou\.site',
317 r'(?:www\.)?invidious\.site',
318 r'(?:www\.)?invidious\.xyz',
319 r'(?:www\.)?invidious\.nixnet\.xyz',
320 r'(?:www\.)?invidious\.048596\.xyz',
321 r'(?:www\.)?invidious\.drycat\.fr',
322 r'(?:www\.)?inv\.skyn3t\.in',
323 r'(?:www\.)?tube\.poal\.co',
324 r'(?:www\.)?tube\.connect\.cafe',
325 r'(?:www\.)?vid\.wxzm\.sx',
326 r'(?:www\.)?vid\.mint\.lgbt',
327 r'(?:www\.)?vid\.puffyan\.us',
328 r'(?:www\.)?yewtu\.be',
329 r'(?:www\.)?yt\.elukerio\.org',
330 r'(?:www\.)?yt\.lelux\.fi',
331 r'(?:www\.)?invidious\.ggc-project\.de',
332 r'(?:www\.)?yt\.maisputain\.ovh',
333 r'(?:www\.)?ytprivate\.com',
334 r'(?:www\.)?invidious\.13ad\.de',
335 r'(?:www\.)?invidious\.toot\.koeln',
336 r'(?:www\.)?invidious\.fdn\.fr',
337 r'(?:www\.)?watch\.nettohikari\.com',
338 r'(?:www\.)?invidious\.namazso\.eu',
339 r'(?:www\.)?invidious\.silkky\.cloud',
340 r'(?:www\.)?invidious\.exonip\.de',
341 r'(?:www\.)?invidious\.riverside\.rocks',
342 r'(?:www\.)?invidious\.blamefran\.net',
343 r'(?:www\.)?invidious\.moomoo\.de',
344 r'(?:www\.)?ytb\.trom\.tf',
345 r'(?:www\.)?yt\.cyberhost\.uk',
346 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
347 r'(?:www\.)?qklhadlycap4cnod\.onion',
348 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
349 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
350 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
351 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
352 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
353 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
354 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
355 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
356 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
357 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
d1c4f6d4
JW
358 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
359 r'(?:www\.)?piped\.kavin\.rocks',
360 r'(?:www\.)?piped\.silkky\.cloud',
361 r'(?:www\.)?piped\.tokhmi\.xyz',
362 r'(?:www\.)?piped\.moomoo\.me',
363 r'(?:www\.)?il\.ax',
364 r'(?:www\.)?piped\.syncpundit\.com',
365 r'(?:www\.)?piped\.mha\.fi',
366 r'(?:www\.)?piped\.mint\.lgbt',
367 r'(?:www\.)?piped\.privacy\.com\.de',
d9190e44
RH
368 )
369
cce889b9 370 def _initialize_consent(self):
371 cookies = self._get_cookies('https://www.youtube.com/')
372 if cookies.get('__Secure-3PSID'):
373 return
374 consent_id = None
375 consent = cookies.get('CONSENT')
376 if consent:
377 if 'YES' in consent.value:
378 return
379 consent_id = self._search_regex(
380 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
381 if not consent_id:
382 consent_id = random.randint(100, 999)
383 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 384
f3aa3c3f 385 def _initialize_pref(self):
386 cookies = self._get_cookies('https://www.youtube.com/')
387 pref_cookie = cookies.get('PREF')
388 pref = {}
389 if pref_cookie:
390 try:
14f25df2 391 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
f3aa3c3f 392 except ValueError:
393 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
396a76f7 394 pref.update({'hl': 'en', 'tz': 'UTC'})
14f25df2 395 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
f3aa3c3f 396
b2e8bc1b 397 def _real_initialize(self):
f3aa3c3f 398 self._initialize_pref()
cce889b9 399 self._initialize_consent()
a25bca9f 400 self._check_login_required()
401
402 def _check_login_required(self):
24146491 403 if self._LOGIN_REQUIRED and not self._cookies_passed:
52efa4b3 404 self.raise_login_required('Login details are needed to download this content', method='cookies')
c5e8d7af 405
b7c47b74 406 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
407 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
a0566bbf 408
000c15a4 409 def _get_default_ytcfg(self, client='web'):
410 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 411
000c15a4 412 def _get_innertube_host(self, client='web'):
413 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 414
000c15a4 415 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 416 # try_get but with fallback to default ytcfg client values when present
417 _func = lambda y: try_get(y, getter, expected_type)
418 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
419
000c15a4 420 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 421 return self._ytcfg_get_safe(
422 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
14f25df2 423 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
109dd3b2 424
000c15a4 425 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 426 return self._ytcfg_get_safe(
427 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
14f25df2 428 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
109dd3b2 429
2ae778b8 430 def _select_api_hostname(self, req_api_hostname, default_client=None):
431 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
432 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
433
000c15a4 434 def _extract_api_key(self, ytcfg=None, default_client='web'):
14f25df2 435 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
109dd3b2 436
000c15a4 437 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 438 context = get_first(
439 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 440 # Enforce language and tz for extraction
441 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
442 client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 443 return context
444
cf87314d 445 _SAPISID = None
446
109dd3b2 447 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 448 time_now = round(time.time())
cf87314d 449 if self._SAPISID is None:
450 yt_cookies = self._get_cookies('https://www.youtube.com')
451 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
452 # See: https://github.com/yt-dlp/yt-dlp/issues/393
453 sapisid_cookie = dict_get(
454 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
455 if sapisid_cookie and sapisid_cookie.value:
456 self._SAPISID = sapisid_cookie.value
457 self.write_debug('Extracted SAPISID cookie')
458 # SAPISID cookie is required if not already present
459 if not yt_cookies.get('SAPISID'):
460 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
461 self._set_cookie(
462 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
463 else:
464 self._SAPISID = False
465 if not self._SAPISID:
466 return None
1974e99f 467 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
468 sapisidhash = hashlib.sha1(
86e5f3ed 469 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
1974e99f 470 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
471
472 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 473 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 474 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 475
109dd3b2 476 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 477 data.update(query)
11f9be09 478 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 479 real_headers.update({'content-type': 'application/json'})
480 if headers:
481 real_headers.update(headers)
2ae778b8 482 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
483 or api_key or self._extract_api_key(default_client=default_client))
545cc85d 484 return self._download_json(
2ae778b8 485 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
a5c56234 486 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 487 data=json.dumps(data).encode('utf8'), headers=real_headers,
2ae778b8 488 query={'key': api_key, 'prettyPrint': 'false'})
f4f751af 489
65141660 490 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
491 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
1890fc63 492
99e9e001 493 @staticmethod
494 def _extract_session_index(*data):
495 """
496 Index of current account in account list.
497 See: https://github.com/yt-dlp/yt-dlp/pull/519
498 """
499 for ytcfg in data:
500 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
501 if session_index is not None:
502 return session_index
503
504 # Deprecated?
505 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca 506 if ytcfg:
14f25df2 507 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
a1c5d2ca
M
508 if token:
509 return token
99e9e001 510 if webpage:
511 return self._search_regex(
512 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
513 'identity token', default=None, fatal=False)
a1c5d2ca
M
514
515 @staticmethod
fe93e2c4 516 def _extract_account_syncid(*args):
8ea3f7b9 517 """
518 Extract syncId required to download private playlists of secondary channels
fe93e2c4 519 @params response and/or ytcfg
8ea3f7b9 520 """
fe93e2c4 521 for data in args:
522 # ytcfg includes channel_syncid if on secondary channel
14f25df2 523 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
fe93e2c4 524 if delegated_sid:
525 return delegated_sid
526 sync_ids = (try_get(
527 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
14f25df2 528 lambda x: x['DATASYNC_ID']), str) or '').split('||')
fe93e2c4 529 if len(sync_ids) >= 2 and sync_ids[1]:
530 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
531 # and just "user_syncid||" for primary channel. We only want the channel_syncid
532 return sync_ids[0]
a1c5d2ca 533
ac56cf38 534 @staticmethod
535 def _extract_visitor_data(*args):
536 """
537 Extracts visitorData from an API response or ytcfg
538 Appears to be used to track session state
539 """
9222c381 540 return get_first(
6c73052c 541 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
9222c381 542 expected_type=str)
ac56cf38 543
2762dbb1 544 @functools.cached_property
99e9e001 545 def is_authenticated(self):
546 return bool(self._generate_sapisidhash_header())
547
11f9be09 548 def extract_ytcfg(self, video_id, webpage):
8c54a305 549 if not webpage:
550 return {}
29f7c58a 551 return self._parse_json(
552 self._search_regex(
553 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 554 default='{}'), video_id, fatal=False) or {}
555
11f9be09 556 def generate_api_headers(
99e9e001 557 self, *, ytcfg=None, account_syncid=None, session_index=None,
558 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
559
2ae778b8 560 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
f4f751af 561 headers = {
14f25df2 562 'X-YouTube-Client-Name': str(
11f9be09 563 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
564 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 565 'Origin': origin,
566 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
567 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
50ac0e54 568 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
569 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
99e9e001 570 }
571 if session_index is None:
314ee305 572 session_index = self._extract_session_index(ytcfg)
573 if account_syncid or session_index is not None:
574 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 575
109dd3b2 576 auth = self._generate_sapisidhash_header(origin)
f4f751af 577 if auth is not None:
578 headers['Authorization'] = auth
109dd3b2 579 headers['X-Origin'] = origin
99e9e001 580 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 581
a25bca9f 582 def _download_ytcfg(self, client, video_id):
583 url = {
584 'web': 'https://www.youtube.com',
585 'web_music': 'https://music.youtube.com',
586 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
587 }.get(client)
588 if not url:
589 return {}
590 webpage = self._download_webpage(
591 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
592 return self.extract_ytcfg(video_id, webpage) or {}
593
2d6659b9 594 @staticmethod
595 def _build_api_continuation_query(continuation, ctp=None):
596 query = {
597 'continuation': continuation
598 }
599 # TODO: Inconsistency with clickTrackingParams.
600 # Currently we have a fixed ctp contained within context (from ytcfg)
601 # and a ctp in root query for continuation.
602 if ctp:
603 query['clickTracking'] = {'clickTrackingParams': ctp}
604 return query
605
2d6659b9 606 @classmethod
607 def _extract_next_continuation_data(cls, renderer):
608 next_continuation = try_get(
609 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
610 lambda x: x['continuation']['reloadContinuationData']), dict)
611 if not next_continuation:
612 return
613 continuation = next_continuation.get('continuation')
614 if not continuation:
615 return
616 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 617 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 618
619 @classmethod
620 def _extract_continuation_ep_data(cls, continuation_ep: dict):
621 if isinstance(continuation_ep, dict):
622 continuation = try_get(
14f25df2 623 continuation_ep, lambda x: x['continuationCommand']['token'], str)
2d6659b9 624 if not continuation:
625 return
626 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 627 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 628
629 @classmethod
630 def _extract_continuation(cls, renderer):
631 next_continuation = cls._extract_next_continuation_data(renderer)
632 if next_continuation:
633 return next_continuation
fe93e2c4 634
2d6659b9 635 contents = []
636 for key in ('contents', 'items'):
637 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 638
2d6659b9 639 for content in contents:
640 if not isinstance(content, dict):
641 continue
642 continuation_ep = try_get(
643 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
644 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
645 dict)
646 continuation = cls._extract_continuation_ep_data(continuation_ep)
647 if continuation:
648 return continuation
649
fe93e2c4 650 @classmethod
651 def _extract_alerts(cls, data):
109dd3b2 652 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
653 if not isinstance(alert_dict, dict):
654 continue
655 for alert in alert_dict.values():
656 alert_type = alert.get('type')
657 if not alert_type:
658 continue
052e1350 659 message = cls._get_text(alert, 'text')
109dd3b2 660 if message:
661 yield alert_type, message
662
c0ac49bc 663 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 664 errors = []
665 warnings = []
666 for alert_type, alert_message in alerts:
641ad5d8 667 if alert_type.lower() == 'error' and fatal:
109dd3b2 668 errors.append([alert_type, alert_message])
669 else:
670 warnings.append([alert_type, alert_message])
671
672 for alert_type, alert_message in (warnings + errors[:-1]):
86e5f3ed 673 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
109dd3b2 674 if errors:
675 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
676
677 def _extract_and_report_alerts(self, data, *args, **kwargs):
678 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
679
47193e02 680 def _extract_badges(self, renderer: dict):
681 badges = set()
682 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
14f25df2 683 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
47193e02 684 if label:
685 badges.add(label.lower())
686 return badges
687
688 @staticmethod
052e1350 689 def _get_text(data, *path_list, max_runs=None):
690 for path in path_list or [None]:
691 if path is None:
692 obj = [data]
693 else:
694 obj = traverse_obj(data, path, default=[])
695 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
696 obj = [obj]
697 for item in obj:
14f25df2 698 text = try_get(item, lambda x: x['simpleText'], str)
052e1350 699 if text:
700 return text
701 runs = try_get(item, lambda x: x['runs'], list) or []
702 if not runs and isinstance(item, list):
703 runs = item
704
705 runs = runs[:min(len(runs), max_runs or len(runs))]
706 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
707 if text:
708 return text
47193e02 709
f0d785d3 710 def _get_count(self, data, *path_list):
711 count_text = self._get_text(data, *path_list) or ''
712 count = parse_count(count_text)
713 if count is None:
714 count = str_to_int(
715 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
716 return count
717
a709d873 718 @staticmethod
719 def _extract_thumbnails(data, *path_list):
720 """
721 Extract thumbnails from thumbnails dict
722 @param path_list: path list to level that contains 'thumbnails' key
723 """
724 thumbnails = []
725 for path in path_list or [()]:
726 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
727 thumbnail_url = url_or_none(thumbnail.get('url'))
728 if not thumbnail_url:
729 continue
730 # Sometimes youtube gives a wrong thumbnail URL. See:
731 # https://github.com/yt-dlp/yt-dlp/issues/233
732 # https://github.com/ytdl-org/youtube-dl/issues/28023
733 if 'maxresdefault' in thumbnail_url:
734 thumbnail_url = thumbnail_url.split('?')[0]
735 thumbnails.append({
736 'url': thumbnail_url,
737 'height': int_or_none(thumbnail.get('height')),
738 'width': int_or_none(thumbnail.get('width')),
739 })
740 return thumbnails
741
f3aa3c3f 742 @staticmethod
743 def extract_relative_time(relative_time_text):
744 """
745 Extracts a relative time from string and converts to dt object
f0d785d3 746 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
f3aa3c3f 747 """
f0d785d3 748 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
f3aa3c3f 749 if mobj:
f0d785d3 750 start = mobj.group('start')
751 if start:
752 return datetime_from_str(start)
f3aa3c3f 753 try:
f0d785d3 754 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
f3aa3c3f 755 except ValueError:
756 return None
757
758 def _extract_time_text(self, renderer, *path_list):
a25bca9f 759 """@returns (timestamp, time_text)"""
f3aa3c3f 760 text = self._get_text(renderer, *path_list) or ''
761 dt = self.extract_relative_time(text)
762 timestamp = None
763 if isinstance(dt, datetime.datetime):
764 timestamp = calendar.timegm(dt.timetuple())
f0d785d3 765
766 if timestamp is None:
767 timestamp = (
768 unified_timestamp(text) or unified_timestamp(
769 self._search_regex(
17322130 770 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
396a76f7 771 text.lower(), 'time text', default=None)))
f0d785d3 772
f3aa3c3f 773 if text and timestamp is None:
17322130 774 self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
f3aa3c3f 775 return timestamp, text
776
109dd3b2 777 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
778 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 779 default_client='web'):
be5c1ae8 780 for retry in self.RetryManager():
109dd3b2 781 try:
782 response = self._call_api(
783 ep=ep, fatal=True, headers=headers,
be5c1ae8 784 video_id=item_id, query=query, note=note,
109dd3b2 785 context=self._extract_context(ytcfg, default_client),
786 api_key=self._extract_api_key(ytcfg, default_client),
be5c1ae8 787 api_hostname=api_hostname, default_client=default_client)
109dd3b2 788 except ExtractorError as e:
be5c1ae8 789 if not isinstance(e.cause, network_exceptions):
790 return self._error_or_warning(e, fatal=fatal)
791 elif not isinstance(e.cause, urllib.error.HTTPError):
792 retry.error = e
793 continue
109dd3b2 794
be5c1ae8 795 first_bytes = e.cause.read(512)
796 if not is_html(first_bytes):
797 yt_error = try_get(
798 self._parse_json(
799 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
800 lambda x: x['error']['message'], str)
801 if yt_error:
802 self._report_alerts([('ERROR', yt_error)], fatal=False)
803 # Downloading page may result in intermittent 5xx HTTP error
804 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
805 # We also want to catch all other network exceptions since errors in later pages can be troublesome
806 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
807 if e.cause.code not in (403, 429):
808 retry.error = e
809 continue
810 return self._error_or_warning(e, fatal=fatal)
811
812 try:
813 self._extract_and_report_alerts(response, only_once=True)
814 except ExtractorError as e:
815 # YouTube servers may return errors we want to retry on in a 200 OK response
816 # See: https://github.com/yt-dlp/yt-dlp/issues/839
817 if 'unknown error' in e.msg.lower():
818 retry.error = e
819 continue
820 return self._error_or_warning(e, fatal=fatal)
821 # Youtube sometimes sends incomplete data
822 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
823 if not traverse_obj(response, *variadic(check_get_keys)):
3ce29336 824 retry.error = ExtractorError('Incomplete data received', expected=True)
be5c1ae8 825 continue
826
827 return response
109dd3b2 828
9297939e 829 @staticmethod
830 def is_music_url(url):
831 return re.match(r'https?://music\.youtube\.com/', url) is not None
832
30a074c2 833 def _extract_video(self, renderer):
834 video_id = renderer.get('videoId')
052e1350 835 title = self._get_text(renderer, 'title')
836 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 837 duration = parse_duration(self._get_text(
838 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1c1b2f96 839 if duration is None:
840 duration = parse_duration(self._search_regex(
841 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
842 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
843 video_id, default=None, group='duration'))
844
f0d785d3 845 view_count = self._get_count(renderer, 'viewCountText')
fe93e2c4 846
052e1350 847 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
f3aa3c3f 848 channel_id = traverse_obj(
a44ca5a4 849 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
850 expected_type=str, get_all=False)
f3aa3c3f 851 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
852 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
853 overlay_style = traverse_obj(
a44ca5a4 854 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
855 get_all=False, expected_type=str)
f3aa3c3f 856 badges = self._extract_badges(renderer)
a709d873 857 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
fd2ad7cb 858 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
a44ca5a4 859 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
860 expected_type=str)) or ''
fd2ad7cb 861 url = f'https://www.youtube.com/watch?v={video_id}'
a44ca5a4 862 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
fd2ad7cb 863 url = f'https://www.youtube.com/shorts/{video_id}'
a709d873 864
30a074c2 865 return {
39ed931e 866 '_type': 'url',
30a074c2 867 'ie_key': YoutubeIE.ie_key(),
868 'id': video_id,
fd2ad7cb 869 'url': url,
30a074c2 870 'title': title,
871 'description': description,
872 'duration': duration,
873 'view_count': view_count,
874 'uploader': uploader,
f3aa3c3f 875 'channel_id': channel_id,
a709d873 876 'thumbnails': thumbnails,
a44ca5a4 877 'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
878 if self._configuration_arg('approximate_date', ie_key='youtubetab')
879 else None),
f3aa3c3f 880 'live_status': ('is_upcoming' if scheduled_timestamp is not None
881 else 'was_live' if 'streamed' in time_text.lower()
a831c2ea 882 else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges
f3aa3c3f 883 else None),
884 'release_timestamp': scheduled_timestamp,
885 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
30a074c2 886 }
887
0c148415 888
360e1ca5 889class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 890 IE_DESC = 'YouTube'
cb7dfeea 891 _VALID_URL = r"""(?x)^
c5e8d7af 892 (
edb53e2d 893 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 894 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
895 (?:www\.)?deturl\.com/www\.youtube\.com|
896 (?:www\.)?pwnyoutube\.com|
897 (?:www\.)?hooktube\.com|
898 (?:www\.)?yourepeat\.com|
899 tube\.majestyc\.net|
900 %(invidious)s|
901 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
902 (?:.*?\#/)? # handle anchor (#/) redirect urls
903 (?: # the various things that can precede the ID:
b6ce9bb0 904 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 905 |(?: # or the v= param in all its forms
f7000f3a 906 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 907 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 908 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
909 v=
910 )
f4b05232 911 ))
cbaed4bb
S
912 |(?:
913 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
914 vid\.plus| # or vid.plus/xxxx
915 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 916 %(invidious)s
cbaed4bb 917 )/
edb53e2d 918 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 919 )
c5e8d7af 920 )? # all until now is optional -> you can pass the naked ID
201c1459 921 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 922 (?(1).+)? # if we found the ID, everything can follow
9297939e 923 (?:\#|$)""" % {
d9190e44 924 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 925 }
bfd973ec 926 _EMBED_REGEX = [r'''(?x)
927 (?:
928 <iframe[^>]+?src=|
929 data-video-url=|
930 <embed[^>]+?src=|
931 embedSWF\(?:\s*|
932 <object[^>]+data=|
933 new\s+SWFObject\(
934 )
935 (["\'])
936 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
937 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
938 \1''']
e40c758c 939 _PLAYER_INFO_RE = (
cc2db878 940 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
941 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 942 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 943 )
2c62dc26 944 _formats = {
c2d3cb4c 945 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
946 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
947 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
948 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
949 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
950 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
951 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
952 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 953 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 954 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
955 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
956 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
957 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
958 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
959 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 960 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 961 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
962 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 963
964
965 # 3D videos
c2d3cb4c 966 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
967 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
968 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
969 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 970 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
971 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
972 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 973
96fb5605 974 # Apple HTTP Live Streaming
11f12195 975 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 976 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
977 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
978 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
979 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
980 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 981 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
982 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
983
984 # DASH mp4 video
d23028a8
S
985 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
986 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
987 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
988 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
989 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 990 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
991 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
992 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
993 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
994 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
995 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
996 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 997
f6f1fc92 998 # Dash mp4 audio
d23028a8
S
999 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1000 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1001 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1002 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1003 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1004 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1005 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1006
1007 # Dash webm
d23028a8
S
1008 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1009 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1010 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1011 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1012 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1013 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1014 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1015 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1016 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1017 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1018 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1019 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1020 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1021 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1022 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1023 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1024 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1025 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1026 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1027 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1028 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1029 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1030
1031 # Dash webm audio
d23028a8
S
1032 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1033 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1034
0857baad 1035 # Dash webm audio with opus inside
d23028a8
S
1036 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1037 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1038 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1039
ce6b9a2d
PH
1040 # RTMP (unnamed)
1041 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1042
1043 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1044 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1045 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1046 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1047 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1048 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1049 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1050 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1051 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1052 }
29f7c58a 1053 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1054
fd5c4aab
S
1055 _GEO_BYPASS = False
1056
78caa52a 1057 IE_NAME = 'youtube'
2eb88d95
PH
1058 _TESTS = [
1059 {
2d3d2997 1060 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1061 'info_dict': {
1062 'id': 'BaW_jenozKc',
1063 'ext': 'mp4',
3867038a 1064 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1065 'uploader': 'Philipp Hagemeister',
1066 'uploader_id': 'phihag',
ec85ded8 1067 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 1068 'channel': 'Philipp Hagemeister',
dd4c4492
S
1069 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1070 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1071 'upload_date': '20121002',
ff9f925b 1072 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1073 'categories': ['Science & Technology'],
3867038a 1074 'tags': ['youtube-dl'],
556dbe7f 1075 'duration': 10,
dbdaaa23 1076 'view_count': int,
3e7c1224 1077 'like_count': int,
ff9f925b 1078 'availability': 'public',
1079 'playable_in_embed': True,
1080 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1081 'live_status': 'not_live',
1082 'age_limit': 0,
7c80519c 1083 'start_time': 1,
297a564b 1084 'end_time': 9,
12a1b225 1085 'comment_count': int,
6c73052c 1086 'channel_follower_count': int
2eb88d95 1087 }
0e853ca4 1088 },
fccd3771 1089 {
4bc3a23e
PH
1090 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1091 'note': 'Embed-only video (#1746)',
1092 'info_dict': {
1093 'id': 'yZIXLfi8CZQ',
1094 'ext': 'mp4',
1095 'upload_date': '20120608',
1096 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1097 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1098 'uploader': 'SET India',
94bfcd23 1099 'uploader_id': 'setindia',
ec85ded8 1100 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1101 'age_limit': 18,
545cc85d 1102 },
1103 'skip': 'Private video',
fccd3771 1104 },
11b56058 1105 {
8bdd16b4 1106 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1107 'note': 'Use the first video ID in the URL',
1108 'info_dict': {
1109 'id': 'BaW_jenozKc',
1110 'ext': 'mp4',
3867038a 1111 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1112 'uploader': 'Philipp Hagemeister',
1113 'uploader_id': 'phihag',
ec85ded8 1114 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
976ae3ea 1115 'channel': 'Philipp Hagemeister',
1116 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1117 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1118 'upload_date': '20121002',
976ae3ea 1119 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1120 'categories': ['Science & Technology'],
3867038a 1121 'tags': ['youtube-dl'],
556dbe7f 1122 'duration': 10,
dbdaaa23 1123 'view_count': int,
11b56058 1124 'like_count': int,
976ae3ea 1125 'availability': 'public',
1126 'playable_in_embed': True,
1127 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1128 'live_status': 'not_live',
1129 'age_limit': 0,
12a1b225 1130 'comment_count': int,
6c73052c 1131 'channel_follower_count': int
34a7de29
S
1132 },
1133 'params': {
1134 'skip_download': True,
1135 },
11b56058 1136 },
dd27fd17 1137 {
2d3d2997 1138 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1139 'note': '256k DASH audio (format 141) via DASH manifest',
1140 'info_dict': {
1141 'id': 'a9LDPn-MO4I',
1142 'ext': 'm4a',
1143 'upload_date': '20121002',
1144 'uploader_id': '8KVIDEO',
ec85ded8 1145 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1146 'description': '',
1147 'uploader': '8KVIDEO',
1148 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1149 },
4bc3a23e
PH
1150 'params': {
1151 'youtube_include_dash_manifest': True,
1152 'format': '141',
4919603f 1153 },
de3c7fe0 1154 'skip': 'format 141 not served anymore',
dd27fd17 1155 },
8bdd16b4 1156 # DASH manifest with encrypted signature
1157 {
1158 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1159 'info_dict': {
1160 'id': 'IB3lcPjvWLA',
1161 'ext': 'm4a',
1162 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1163 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1164 'duration': 244,
1165 'uploader': 'AfrojackVEVO',
1166 'uploader_id': 'AfrojackVEVO',
1167 'upload_date': '20131011',
cc2db878 1168 'abr': 129.495,
976ae3ea 1169 'like_count': int,
1170 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1171 'playable_in_embed': True,
1172 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1173 'view_count': int,
1174 'track': 'The Spark',
1175 'live_status': 'not_live',
1176 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1177 'channel': 'Afrojack',
1178 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1179 'tags': 'count:19',
1180 'availability': 'public',
1181 'categories': ['Music'],
1182 'age_limit': 0,
1183 'alt_title': 'The Spark',
6c73052c 1184 'channel_follower_count': int
8bdd16b4 1185 },
1186 'params': {
1187 'youtube_include_dash_manifest': True,
1188 'format': '141/bestaudio[ext=m4a]',
1189 },
1190 },
65c2fde2 1191 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1192 {
65c2fde2 1193 'note': 'Embed allowed age-gate video',
2d3d2997 1194 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1195 'info_dict': {
1196 'id': 'HtVdAasjOgU',
1197 'ext': 'mp4',
1198 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1199 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1200 'duration': 142,
c522adb1
JMF
1201 'uploader': 'The Witcher',
1202 'uploader_id': 'WitcherGame',
ec85ded8 1203 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1204 'upload_date': '20140605',
34952f09 1205 'age_limit': 18,
976ae3ea 1206 'categories': ['Gaming'],
1207 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1208 'availability': 'needs_auth',
1209 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1210 'like_count': int,
1211 'channel': 'The Witcher',
1212 'live_status': 'not_live',
1213 'tags': 'count:17',
1214 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1215 'playable_in_embed': True,
1216 'view_count': int,
6c73052c 1217 'channel_follower_count': int
c522adb1
JMF
1218 },
1219 },
65c2fde2 1220 {
1221 'note': 'Age-gate video with embed allowed in public site',
1222 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1223 'info_dict': {
1224 'id': 'HsUATh_Nc2U',
1225 'ext': 'mp4',
1226 'title': 'Godzilla 2 (Official Video)',
1227 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1228 'upload_date': '20200408',
1229 'uploader_id': 'FlyingKitty900',
1230 'uploader': 'FlyingKitty',
1231 'age_limit': 18,
976ae3ea 1232 'availability': 'needs_auth',
1233 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1234 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1235 'channel': 'FlyingKitty',
1236 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1237 'view_count': int,
1238 'categories': ['Entertainment'],
1239 'live_status': 'not_live',
1240 'tags': ['Flyingkitty', 'godzilla 2'],
1241 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1242 'like_count': int,
1243 'duration': 177,
1244 'playable_in_embed': True,
6c73052c 1245 'channel_follower_count': int
65c2fde2 1246 },
1247 },
1248 {
1249 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1250 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1251 'info_dict': {
1252 'id': 'Tq92D6wQ1mg',
1253 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1254 'ext': 'mp4',
17322130 1255 'upload_date': '20191228',
65c2fde2 1256 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1257 'uploader': 'Projekt Melody',
1258 'description': 'md5:17eccca93a786d51bc67646756894066',
1259 'age_limit': 18,
976ae3ea 1260 'like_count': int,
1261 'availability': 'needs_auth',
1262 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1263 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1264 'view_count': int,
1265 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1266 'channel': 'Projekt Melody',
1267 'live_status': 'not_live',
1268 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1269 'playable_in_embed': True,
1270 'categories': ['Entertainment'],
1271 'duration': 106,
1272 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
12a1b225 1273 'comment_count': int,
6c73052c 1274 'channel_follower_count': int
65c2fde2 1275 },
1276 },
1277 {
1278 'note': 'Non-Agegated non-embeddable video',
1279 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1280 'info_dict': {
1281 'id': 'MeJVWBSsPAY',
1282 'ext': 'mp4',
1283 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1284 'uploader': 'Herr Lurik',
1285 'uploader_id': 'st3in234',
1286 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1287 'upload_date': '20130730',
976ae3ea 1288 'track': 'Such mich find mich',
1289 'age_limit': 0,
1290 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1291 'like_count': int,
1292 'playable_in_embed': False,
1293 'creator': 'OOMPH!',
1294 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1295 'view_count': int,
1296 'alt_title': 'Such mich find mich',
1297 'duration': 210,
1298 'channel': 'Herr Lurik',
1299 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1300 'categories': ['Music'],
1301 'availability': 'public',
1302 'uploader_url': 'http://www.youtube.com/user/st3in234',
1303 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1304 'live_status': 'not_live',
1305 'artist': 'OOMPH!',
6c73052c 1306 'channel_follower_count': int
65c2fde2 1307 },
1308 },
1309 {
1310 'note': 'Non-bypassable age-gated video',
1311 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1312 'only_matching': True,
1313 },
8bdd16b4 1314 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1315 # YouTube Red ad is not captured for creator
1316 {
1317 'url': '__2ABJjxzNo',
1318 'info_dict': {
1319 'id': '__2ABJjxzNo',
1320 'ext': 'mp4',
1321 'duration': 266,
1322 'upload_date': '20100430',
1323 'uploader_id': 'deadmau5',
1324 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1325 'creator': 'deadmau5',
1326 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1327 'uploader': 'deadmau5',
1328 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1329 'alt_title': 'Some Chords',
976ae3ea 1330 'availability': 'public',
1331 'tags': 'count:14',
1332 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1333 'view_count': int,
1334 'live_status': 'not_live',
1335 'channel': 'deadmau5',
1336 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1337 'like_count': int,
1338 'track': 'Some Chords',
1339 'artist': 'deadmau5',
1340 'playable_in_embed': True,
1341 'age_limit': 0,
1342 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1343 'categories': ['Music'],
1344 'album': 'Some Chords',
6c73052c 1345 'channel_follower_count': int
8bdd16b4 1346 },
1347 'expected_warnings': [
1348 'DASH manifest missing',
1349 ]
1350 },
067aa17e 1351 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1352 {
1353 'url': 'lqQg6PlCWgI',
1354 'info_dict': {
1355 'id': 'lqQg6PlCWgI',
1356 'ext': 'mp4',
556dbe7f 1357 'duration': 6085,
90227264 1358 'upload_date': '20150827',
cbe2bd91 1359 'uploader_id': 'olympic',
ec85ded8 1360 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
12a1b225 1361 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
11f9be09 1362 'uploader': 'Olympics',
cbe2bd91 1363 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1364 'like_count': int,
1365 'release_timestamp': 1343767800,
1366 'playable_in_embed': True,
1367 'categories': ['Sports'],
1368 'release_date': '20120731',
1369 'channel': 'Olympics',
1370 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1371 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1372 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1373 'age_limit': 0,
1374 'availability': 'public',
1375 'live_status': 'was_live',
1376 'view_count': int,
1377 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
6c73052c 1378 'channel_follower_count': int
cbe2bd91
PH
1379 },
1380 'params': {
1381 'skip_download': 'requires avconv',
e52a40ab 1382 }
cbe2bd91 1383 },
6271f1ca
PH
1384 # Non-square pixels
1385 {
1386 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1387 'info_dict': {
1388 'id': '_b-2C3KPAM0',
1389 'ext': 'mp4',
1390 'stretched_ratio': 16 / 9.,
556dbe7f 1391 'duration': 85,
6271f1ca
PH
1392 'upload_date': '20110310',
1393 'uploader_id': 'AllenMeow',
ec85ded8 1394 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1395 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1396 'uploader': '孫ᄋᄅ',
6271f1ca 1397 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1398 'playable_in_embed': True,
1399 'channel': '孫ᄋᄅ',
1400 'age_limit': 0,
1401 'tags': 'count:11',
1402 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1403 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1404 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1405 'view_count': int,
1406 'categories': ['People & Blogs'],
1407 'like_count': int,
1408 'live_status': 'not_live',
1409 'availability': 'unlisted',
12a1b225 1410 'comment_count': int,
6c73052c 1411 'channel_follower_count': int
6271f1ca 1412 },
06b491eb
S
1413 },
1414 # url_encoded_fmt_stream_map is empty string
1415 {
1416 'url': 'qEJwOuvDf7I',
1417 'info_dict': {
1418 'id': 'qEJwOuvDf7I',
f57b7835 1419 'ext': 'webm',
06b491eb
S
1420 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1421 'description': '',
1422 'upload_date': '20150404',
1423 'uploader_id': 'spbelect',
1424 'uploader': 'Наблюдатели Петербурга',
1425 },
1426 'params': {
1427 'skip_download': 'requires avconv',
e323cf3f
S
1428 },
1429 'skip': 'This live event has ended.',
06b491eb 1430 },
067aa17e 1431 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1432 {
1433 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1434 'info_dict': {
1435 'id': 'FIl7x6_3R5Y',
eb6793ba 1436 'ext': 'webm',
da77d856
S
1437 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1438 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1439 'duration': 220,
da77d856
S
1440 'upload_date': '20150625',
1441 'uploader_id': 'dorappi2000',
ec85ded8 1442 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1443 'uploader': 'dorappi2000',
eb6793ba 1444 'formats': 'mincount:31',
da77d856 1445 },
eb6793ba 1446 'skip': 'not actual anymore',
2ee8f5d8 1447 },
8a1a26ce
YCH
1448 # DASH manifest with segment_list
1449 {
1450 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1451 'md5': '8ce563a1d667b599d21064e982ab9e31',
1452 'info_dict': {
1453 'id': 'CsmdDsKjzN8',
1454 'ext': 'mp4',
17ee98e1 1455 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1456 'uploader': 'Airtek',
1457 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1458 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1459 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1460 },
1461 'params': {
1462 'youtube_include_dash_manifest': True,
1463 'format': '135', # bestvideo
be49068d
S
1464 },
1465 'skip': 'This live event has ended.',
2ee8f5d8 1466 },
cf7e015f
S
1467 {
1468 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1469 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1470 'info_dict': {
545cc85d 1471 'id': 'jvGDaLqkpTg',
1472 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1473 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1474 },
1475 'playlist': [{
1476 'info_dict': {
545cc85d 1477 'id': 'jvGDaLqkpTg',
cf7e015f 1478 'ext': 'mp4',
545cc85d 1479 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1480 'description': 'md5:e03b909557865076822aa169218d6a5d',
1481 'duration': 10643,
1482 'upload_date': '20161111',
1483 'uploader': 'Team PGP',
1484 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1485 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1486 },
1487 }, {
1488 'info_dict': {
545cc85d 1489 'id': '3AKt1R1aDnw',
cf7e015f 1490 'ext': 'mp4',
545cc85d 1491 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1492 'description': 'md5:e03b909557865076822aa169218d6a5d',
1493 'duration': 10991,
1494 'upload_date': '20161111',
1495 'uploader': 'Team PGP',
1496 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1497 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1498 },
1499 }, {
1500 'info_dict': {
545cc85d 1501 'id': 'RtAMM00gpVc',
cf7e015f 1502 'ext': 'mp4',
545cc85d 1503 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1504 'description': 'md5:e03b909557865076822aa169218d6a5d',
1505 'duration': 10995,
1506 'upload_date': '20161111',
1507 'uploader': 'Team PGP',
1508 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1509 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1510 },
1511 }, {
1512 'info_dict': {
545cc85d 1513 'id': '6N2fdlP3C5U',
cf7e015f 1514 'ext': 'mp4',
545cc85d 1515 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1516 'description': 'md5:e03b909557865076822aa169218d6a5d',
1517 'duration': 10990,
1518 'upload_date': '20161111',
1519 'uploader': 'Team PGP',
1520 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1521 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1522 },
1523 }],
1524 'params': {
1525 'skip_download': True,
1526 },
65c2fde2 1527 'skip': 'Not multifeed anymore',
cbaed4bb 1528 },
f9f49d87 1529 {
067aa17e 1530 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1531 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1532 'info_dict': {
1533 'id': 'gVfLd0zydlo',
1534 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1535 },
1536 'playlist_count': 2,
be49068d 1537 'skip': 'Not multifeed anymore',
f9f49d87 1538 },
cbaed4bb 1539 {
2d3d2997 1540 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1541 'only_matching': True,
0e49d9a6 1542 },
6d4fc66b 1543 {
2d3d2997 1544 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1545 'only_matching': True,
1546 },
0e49d9a6 1547 {
067aa17e 1548 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1549 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1550 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1551 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1552 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1553 'info_dict': {
1554 'id': 'lsguqyKfVQg',
1555 'ext': 'mp4',
1556 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1557 'alt_title': 'Dark Walk',
0e49d9a6 1558 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1559 'duration': 133,
0e49d9a6
LL
1560 'upload_date': '20151119',
1561 'uploader_id': 'IronSoulElf',
ec85ded8 1562 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1563 'uploader': 'IronSoulElf',
11f9be09 1564 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1565 'track': 'Dark Walk',
1566 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1567 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1568 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1569 'categories': ['Film & Animation'],
1570 'view_count': int,
1571 'live_status': 'not_live',
1572 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1573 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1574 'tags': 'count:13',
1575 'availability': 'public',
1576 'channel': 'IronSoulElf',
1577 'playable_in_embed': True,
1578 'like_count': int,
1579 'age_limit': 0,
6c73052c 1580 'channel_follower_count': int
0e49d9a6
LL
1581 },
1582 'params': {
1583 'skip_download': True,
1584 },
1585 },
61f92af1 1586 {
067aa17e 1587 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1588 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1589 'only_matching': True,
1590 },
313dfc45
LL
1591 {
1592 # Video with yt:stretch=17:0
1593 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1594 'info_dict': {
1595 'id': 'Q39EVAstoRM',
1596 'ext': 'mp4',
1597 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1598 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1599 'upload_date': '20151107',
1600 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1601 'uploader': 'CH GAMER DROID',
1602 },
1603 'params': {
1604 'skip_download': True,
1605 },
be49068d 1606 'skip': 'This video does not exist.',
313dfc45 1607 },
201c1459 1608 {
1609 # Video with incomplete 'yt:stretch=16:'
1610 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1611 'only_matching': True,
1612 },
7caf9830
S
1613 {
1614 # Video licensed under Creative Commons
1615 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1616 'info_dict': {
1617 'id': 'M4gD1WSo5mA',
1618 'ext': 'mp4',
1619 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1620 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1621 'duration': 721,
17322130 1622 'upload_date': '20150128',
7caf9830 1623 'uploader_id': 'BerkmanCenter',
ec85ded8 1624 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1625 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830 1626 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1627 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1628 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1629 'like_count': int,
1630 'age_limit': 0,
1631 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1632 'channel': 'The Berkman Klein Center for Internet & Society',
1633 'availability': 'public',
1634 'view_count': int,
1635 'categories': ['Education'],
1636 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1637 'live_status': 'not_live',
1638 'playable_in_embed': True,
12a1b225 1639 'comment_count': int,
6c73052c 1640 'channel_follower_count': int
7caf9830
S
1641 },
1642 'params': {
1643 'skip_download': True,
1644 },
1645 },
fd050249
S
1646 {
1647 # Channel-like uploader_url
1648 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1649 'info_dict': {
1650 'id': 'eQcmzGIKrzg',
1651 'ext': 'mp4',
1652 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1653 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1654 'duration': 4060,
17322130 1655 'upload_date': '20151120',
eb6793ba 1656 'uploader': 'Bernie Sanders',
fd050249 1657 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1658 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249 1659 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1660 'playable_in_embed': True,
1661 'tags': 'count:12',
1662 'like_count': int,
1663 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1664 'age_limit': 0,
1665 'availability': 'public',
1666 'categories': ['News & Politics'],
1667 'channel': 'Bernie Sanders',
1668 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1669 'view_count': int,
1670 'live_status': 'not_live',
1671 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
12a1b225 1672 'comment_count': int,
6c73052c 1673 'channel_follower_count': int
fd050249
S
1674 },
1675 'params': {
1676 'skip_download': True,
1677 },
1678 },
040ac686
S
1679 {
1680 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1681 'only_matching': True,
7f29cf54
S
1682 },
1683 {
067aa17e 1684 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1685 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1686 'only_matching': True,
6496ccb4
S
1687 },
1688 {
1689 # Rental video preview
1690 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1691 'info_dict': {
1692 'id': 'uGpuVWrhIzE',
1693 'ext': 'mp4',
1694 'title': 'Piku - Trailer',
1695 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1696 'upload_date': '20150811',
1697 'uploader': 'FlixMatrix',
1698 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1699 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1700 'license': 'Standard YouTube License',
1701 },
1702 'params': {
1703 'skip_download': True,
1704 },
eb6793ba 1705 'skip': 'This video is not available.',
022a5d66 1706 },
12afdc2a
S
1707 {
1708 # YouTube Red video with episode data
1709 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1710 'info_dict': {
1711 'id': 'iqKdEhx-dD4',
1712 'ext': 'mp4',
1713 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1714 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1715 'duration': 2085,
12afdc2a
S
1716 'upload_date': '20170118',
1717 'uploader': 'Vsauce',
1718 'uploader_id': 'Vsauce',
1719 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1720 'series': 'Mind Field',
1721 'season_number': 1,
1722 'episode_number': 1,
976ae3ea 1723 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1724 'tags': 'count:12',
1725 'view_count': int,
1726 'availability': 'public',
1727 'age_limit': 0,
1728 'channel': 'Vsauce',
1729 'episode': 'Episode 1',
1730 'categories': ['Entertainment'],
1731 'season': 'Season 1',
1732 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1733 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1734 'like_count': int,
1735 'playable_in_embed': True,
1736 'live_status': 'not_live',
6c73052c 1737 'channel_follower_count': int
12afdc2a
S
1738 },
1739 'params': {
1740 'skip_download': True,
1741 },
1742 'expected_warnings': [
1743 'Skipping DASH manifest',
1744 ],
1745 },
c7121fa7
S
1746 {
1747 # The following content has been identified by the YouTube community
1748 # as inappropriate or offensive to some audiences.
1749 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1750 'info_dict': {
1751 'id': '6SJNVb0GnPI',
1752 'ext': 'mp4',
1753 'title': 'Race Differences in Intelligence',
1754 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1755 'duration': 965,
1756 'upload_date': '20140124',
1757 'uploader': 'New Century Foundation',
1758 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1759 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1760 },
1761 'params': {
1762 'skip_download': True,
1763 },
545cc85d 1764 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1765 },
022a5d66
S
1766 {
1767 # itag 212
1768 'url': '1t24XAntNCY',
1769 'only_matching': True,
fd5c4aab
S
1770 },
1771 {
1772 # geo restricted to JP
1773 'url': 'sJL6WA-aGkQ',
1774 'only_matching': True,
1775 },
cd5a74a2
S
1776 {
1777 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1778 'only_matching': True,
1779 },
bc2ca1bb 1780 {
1781 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1782 'only_matching': True,
1783 },
1784 {
1785 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1786 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1787 'only_matching': True,
1788 },
825cd268
RA
1789 {
1790 # DRM protected
1791 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1792 'only_matching': True,
4fe54c12
S
1793 },
1794 {
1795 # Video with unsupported adaptive stream type formats
1796 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1797 'info_dict': {
1798 'id': 'Z4Vy8R84T1U',
1799 'ext': 'mp4',
1800 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1801 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1802 'duration': 433,
1803 'upload_date': '20130923',
1804 'uploader': 'Amelia Putri Harwita',
1805 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1806 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1807 'formats': 'maxcount:10',
1808 },
1809 'params': {
1810 'skip_download': True,
1811 'youtube_include_dash_manifest': False,
1812 },
5429d6a9 1813 'skip': 'not actual anymore',
5caabd3c 1814 },
1815 {
822b9d9c 1816 # Youtube Music Auto-generated description
5caabd3c 1817 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1818 'info_dict': {
1819 'id': 'MgNrAu2pzNs',
1820 'ext': 'mp4',
1821 'title': 'Voyeur Girl',
1822 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1823 'upload_date': '20190312',
5429d6a9
S
1824 'uploader': 'Stephen - Topic',
1825 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1826 'artist': 'Stephen',
1827 'track': 'Voyeur Girl',
1828 'album': 'it\'s too much love to know my dear',
1829 'release_date': '20190313',
1830 'release_year': 2019,
976ae3ea 1831 'alt_title': 'Voyeur Girl',
1832 'view_count': int,
1833 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1834 'playable_in_embed': True,
1835 'like_count': int,
1836 'categories': ['Music'],
1837 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1838 'channel': 'Stephen',
1839 'availability': 'public',
1840 'creator': 'Stephen',
1841 'duration': 169,
1842 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1843 'age_limit': 0,
1844 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1845 'tags': 'count:11',
1846 'live_status': 'not_live',
6c73052c 1847 'channel_follower_count': int
5caabd3c 1848 },
1849 'params': {
1850 'skip_download': True,
1851 },
1852 },
66b48727
RA
1853 {
1854 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1855 'only_matching': True,
1856 },
011e75e6
S
1857 {
1858 # invalid -> valid video id redirection
1859 'url': 'DJztXj2GPfl',
1860 'info_dict': {
1861 'id': 'DJztXj2GPfk',
1862 'ext': 'mp4',
1863 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1864 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1865 'upload_date': '20090125',
1866 'uploader': 'Prochorowka',
1867 'uploader_id': 'Prochorowka',
1868 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1869 'artist': 'Panjabi MC',
1870 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1871 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1872 },
1873 'params': {
1874 'skip_download': True,
1875 },
545cc85d 1876 'skip': 'Video unavailable',
ea74e00b
DP
1877 },
1878 {
1879 # empty description results in an empty string
1880 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1881 'info_dict': {
1882 'id': 'x41yOUIvK2k',
1883 'ext': 'mp4',
1884 'title': 'IMG 3456',
1885 'description': '',
1886 'upload_date': '20170613',
1887 'uploader_id': 'ElevageOrVert',
1888 'uploader': 'ElevageOrVert',
976ae3ea 1889 'view_count': int,
1890 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1891 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1892 'like_count': int,
1893 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1894 'tags': [],
1895 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1896 'availability': 'public',
1897 'age_limit': 0,
1898 'categories': ['Pets & Animals'],
1899 'duration': 7,
1900 'playable_in_embed': True,
1901 'live_status': 'not_live',
1902 'channel': 'ElevageOrVert',
6c73052c 1903 'channel_follower_count': int
ea74e00b
DP
1904 },
1905 'params': {
1906 'skip_download': True,
1907 },
1908 },
a0566bbf 1909 {
29f7c58a 1910 # with '};' inside yt initial data (see [1])
1911 # see [2] for an example with '};' inside ytInitialPlayerResponse
1912 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1913 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1914 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1915 'info_dict': {
1916 'id': 'CHqg6qOn4no',
1917 'ext': 'mp4',
1918 'title': 'Part 77 Sort a list of simple types in c#',
1919 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1920 'upload_date': '20130831',
1921 'uploader_id': 'kudvenkat',
1922 'uploader': 'kudvenkat',
976ae3ea 1923 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
1924 'like_count': int,
1925 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
1926 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
1927 'live_status': 'not_live',
1928 'categories': ['Education'],
1929 'availability': 'public',
1930 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
1931 'tags': 'count:12',
1932 'playable_in_embed': True,
1933 'age_limit': 0,
1934 'view_count': int,
1935 'duration': 522,
1936 'channel': 'kudvenkat',
12a1b225 1937 'comment_count': int,
6c73052c 1938 'channel_follower_count': int
a0566bbf 1939 },
1940 'params': {
1941 'skip_download': True,
1942 },
1943 },
29f7c58a 1944 {
1945 # another example of '};' in ytInitialData
1946 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1947 'only_matching': True,
1948 },
1949 {
1950 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1951 'only_matching': True,
1952 },
545cc85d 1953 {
cc2db878 1954 # https://github.com/ytdl-org/youtube-dl/pull/28094
1955 'url': 'OtqTfy26tG0',
1956 'info_dict': {
1957 'id': 'OtqTfy26tG0',
1958 'ext': 'mp4',
1959 'title': 'Burn Out',
1960 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1961 'upload_date': '20141120',
1962 'uploader': 'The Cinematic Orchestra - Topic',
1963 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1964 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1965 'artist': 'The Cinematic Orchestra',
1966 'track': 'Burn Out',
1967 'album': 'Every Day',
976ae3ea 1968 'like_count': int,
1969 'live_status': 'not_live',
1970 'alt_title': 'Burn Out',
1971 'duration': 614,
1972 'age_limit': 0,
1973 'view_count': int,
1974 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1975 'creator': 'The Cinematic Orchestra',
1976 'channel': 'The Cinematic Orchestra',
1977 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
1978 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1979 'availability': 'public',
1980 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
1981 'categories': ['Music'],
1982 'playable_in_embed': True,
6c73052c 1983 'channel_follower_count': int
cc2db878 1984 },
1985 'params': {
1986 'skip_download': True,
1987 },
545cc85d 1988 },
bc2ca1bb 1989 {
1990 # controversial video, only works with bpctr when authenticated with cookies
1991 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1992 'only_matching': True,
1993 },
a1a7907b 1994 {
1995 # controversial video, requires bpctr/contentCheckOk
1996 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1997 'info_dict': {
1998 'id': 'SZJvDhaSDnc',
1999 'ext': 'mp4',
2000 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2001 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
976ae3ea 2002 'uploader': 'CBS Mornings',
11f9be09 2003 'uploader_id': 'CBSThisMorning',
a1a7907b 2004 'upload_date': '20140716',
976ae3ea 2005 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2006 'duration': 170,
2007 'categories': ['News & Politics'],
2008 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
2009 'view_count': int,
2010 'channel': 'CBS Mornings',
2011 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2012 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2013 'age_limit': 18,
2014 'availability': 'needs_auth',
2015 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2016 'like_count': int,
2017 'live_status': 'not_live',
2018 'playable_in_embed': True,
6c73052c 2019 'channel_follower_count': int
a1a7907b 2020 }
2021 },
f7ad7160 2022 {
2023 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2024 'url': 'cBvYw8_A0vQ',
2025 'info_dict': {
2026 'id': 'cBvYw8_A0vQ',
2027 'ext': 'mp4',
2028 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2029 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2030 'upload_date': '20201120',
2031 'uploader': 'Walk around Japan',
2032 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2033 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
976ae3ea 2034 'duration': 1456,
2035 'categories': ['Travel & Events'],
2036 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2037 'view_count': int,
2038 'channel': 'Walk around Japan',
2039 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2040 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2041 'age_limit': 0,
2042 'availability': 'public',
2043 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2044 'live_status': 'not_live',
2045 'playable_in_embed': True,
6c73052c 2046 'channel_follower_count': int
f7ad7160 2047 },
2048 'params': {
2049 'skip_download': True,
2050 },
0fb983f6 2051 }, {
2052 # Has multiple audio streams
2053 'url': 'WaOKSUlf4TM',
2054 'only_matching': True
9297939e 2055 }, {
2056 # Requires Premium: has format 141 when requested using YTM url
2057 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2058 'only_matching': True
2059 }, {
120916da 2060 # multiple subtitles with same lang_code
2061 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2062 'only_matching': True,
109dd3b2 2063 }, {
2064 # Force use android client fallback
2065 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2066 'info_dict': {
2067 'id': 'YOelRv7fMxY',
11f9be09 2068 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 2069 'ext': '3gp',
2070 'upload_date': '20210624',
2071 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2072 'uploader': 'colinfurze',
11f9be09 2073 'uploader_id': 'colinfurze',
109dd3b2 2074 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2075 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2076 'duration': 596,
2077 'categories': ['Entertainment'],
2078 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2079 'view_count': int,
2080 'channel': 'colinfurze',
2081 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2082 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2083 'age_limit': 0,
2084 'availability': 'public',
2085 'like_count': int,
2086 'live_status': 'not_live',
2087 'playable_in_embed': True,
6c73052c 2088 'channel_follower_count': int
109dd3b2 2089 },
2090 'params': {
2091 'format': '17', # 3gp format available on android
2092 'extractor_args': {'youtube': {'player_client': ['android']}},
2093 },
120916da 2094 },
109dd3b2 2095 {
2096 # Skip download of additional client configs (remix client config in this case)
2097 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2098 'only_matching': True,
2099 'params': {
2100 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2101 },
8fc54b12 2102 }, {
2103 # shorts
2104 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2105 'only_matching': True,
9222c381 2106 }, {
2107 'note': 'Storyboards',
2108 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2109 'info_dict': {
2110 'id': '5KLPxDtMqe8',
2111 'ext': 'mhtml',
2112 'format_id': 'sb0',
2113 'title': 'Your Brain is Plastic',
2114 'uploader_id': 'scishow',
2115 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2116 'upload_date': '20140324',
2117 'uploader': 'SciShow',
976ae3ea 2118 'like_count': int,
2119 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2120 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2121 'view_count': int,
2122 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2123 'playable_in_embed': True,
2124 'tags': 'count:12',
2125 'uploader_url': 'http://www.youtube.com/user/scishow',
2126 'availability': 'public',
2127 'channel': 'SciShow',
2128 'live_status': 'not_live',
2129 'duration': 248,
2130 'categories': ['Education'],
2131 'age_limit': 0,
6c73052c 2132 'channel_follower_count': int
9222c381 2133 }, 'params': {'format': 'mhtml', 'skip_download': True}
992f9a73 2134 }, {
2135 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2136 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2137 'info_dict': {
2138 'id': '2NUZ8W2llS4',
2139 'ext': 'mp4',
2140 'title': 'The NP that test your phone performance 🙂',
2141 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2142 'uploader': 'Leon Nguyen',
2143 'uploader_id': 'VNSXIII',
2144 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2145 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2146 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2147 'duration': 21,
2148 'view_count': int,
2149 'age_limit': 0,
2150 'categories': ['Gaming'],
2151 'tags': 'count:23',
2152 'playable_in_embed': True,
2153 'live_status': 'not_live',
2154 'upload_date': '20220103',
2155 'like_count': int,
2156 'availability': 'public',
2157 'channel': 'Leon Nguyen',
2158 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
12a1b225 2159 'comment_count': int,
992f9a73 2160 'channel_follower_count': int
2161 }
2162 }, {
2163 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2164 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2165 'info_dict': {
2166 'id': 'mzZzzBU6lrM',
2167 'ext': 'mp4',
2168 'title': 'I Met GeorgeNotFound In Real Life...',
2169 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2170 'uploader': 'Quackity',
2171 'uploader_id': 'QuackityHQ',
2172 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2173 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2174 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2175 'duration': 955,
2176 'view_count': int,
2177 'age_limit': 0,
2178 'categories': ['Entertainment'],
2179 'tags': 'count:26',
2180 'playable_in_embed': True,
2181 'live_status': 'not_live',
2182 'release_timestamp': 1641172509,
2183 'release_date': '20220103',
2184 'upload_date': '20220103',
2185 'like_count': int,
2186 'availability': 'public',
2187 'channel': 'Quackity',
2188 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2189 'channel_follower_count': int
2190 }
2191 },
2192 { # continuous livestream. Microformat upload date should be preferred.
2193 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2194 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2195 'info_dict': {
2196 'id': 'kgx4WGK0oNU',
2197 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2198 'ext': 'mp4',
2199 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2200 'availability': 'public',
2201 'age_limit': 0,
2202 'release_timestamp': 1637975704,
2203 'upload_date': '20210619',
2204 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2205 'live_status': 'is_live',
2206 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2207 'uploader': '阿鲍Abao',
2208 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2209 'channel': 'Abao in Tokyo',
2210 'channel_follower_count': int,
2211 'release_date': '20211127',
2212 'tags': 'count:39',
2213 'categories': ['People & Blogs'],
2214 'like_count': int,
2215 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2216 'view_count': int,
2217 'playable_in_embed': True,
2218 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2219 },
2220 'params': {'skip_download': True}
6e634cbe 2221 }, {
2222 # Story. Requires specific player params to work.
ee27297f 2223 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
6e634cbe 2224 'info_dict': {
ee27297f 2225 'id': 'vv8qTUWmulI',
6e634cbe 2226 'ext': 'mp4',
ee27297f 2227 'availability': 'unlisted',
2228 'view_count': int,
2229 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2230 'upload_date': '20220526',
2231 'categories': ['Education'],
2232 'title': 'Story',
2233 'channel': 'IT\'S HISTORY',
2234 'description': '',
2235 'uploader_id': 'BlastfromthePast',
2236 'duration': 12,
2237 'uploader': 'IT\'S HISTORY',
6e634cbe 2238 'playable_in_embed': True,
6e634cbe 2239 'age_limit': 0,
6e634cbe 2240 'live_status': 'not_live',
ee27297f 2241 'tags': [],
2242 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2243 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2244 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
12a1b225
A
2245 },
2246 'skip': 'stories get removed after some period of time',
ee27297f 2247 }, {
2248 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2249 'info_dict': {
2250 'id': 'tjjjtzRLHvA',
2251 'ext': 'mp4',
2252 'title': 'ハッシュタグ無し };if window.ytcsi',
2253 'upload_date': '20220323',
2254 'like_count': int,
2255 'availability': 'unlisted',
2256 'channel': 'nao20010128nao',
2257 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2258 'age_limit': 0,
2259 'uploader': 'nao20010128nao',
2260 'uploader_id': 'nao20010128nao',
2261 'categories': ['Music'],
6e634cbe 2262 'view_count': int,
2263 'description': '',
ee27297f 2264 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2265 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2266 'live_status': 'not_live',
2267 'playable_in_embed': True,
2268 'channel_follower_count': int,
2269 'duration': 6,
2270 'tags': [],
2271 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
6e634cbe 2272 }
a4166234 2273 }, {
2274 'note': '6 channel audio',
2275 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2276 'only_matching': True,
6e634cbe 2277 }
2eb88d95
PH
2278 ]
2279
f2e8dbcc 2280 _WEBPAGE_TESTS = [
2281 # YouTube <object> embed
2282 {
2283 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2284 'md5': '873c81d308b979f0e23ee7e620b312a3',
2285 'info_dict': {
2286 'id': 'msN87y-iEx0',
2287 'ext': 'mp4',
2288 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2289 'upload_date': '20080526',
2290 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2291 'uploader': 'Christopher Sykes',
2292 'uploader_id': 'ChristopherJSykes',
2293 'age_limit': 0,
2294 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2295 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2296 'playable_in_embed': True,
2297 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2298 'like_count': int,
2299 'comment_count': int,
2300 'channel': 'Christopher Sykes',
2301 'live_status': 'not_live',
2302 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2303 'availability': 'public',
2304 'duration': 195,
2305 'view_count': int,
2306 'categories': ['Science & Technology'],
2307 'channel_follower_count': int,
2308 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2309 },
2310 'params': {
2311 'skip_download': True,
2312 }
2313 },
2314 ]
2315
201c1459 2316 @classmethod
2317 def suitable(cls, url):
4dfbf869 2318 from ..utils import parse_qs
2319
201c1459 2320 qs = parse_qs(url)
2321 if qs.get('list', [None])[0]:
2322 return False
86e5f3ed 2323 return super().suitable(url)
201c1459 2324
e0df6211 2325 def __init__(self, *args, **kwargs):
86e5f3ed 2326 super().__init__(*args, **kwargs)
545cc85d 2327 self._code_cache = {}
83799698 2328 self._player_cache = {}
e0df6211 2329
adbc4ec4 2330 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
adbc4ec4
THD
2331 lock = threading.Lock()
2332
2333 is_live = True
185bf310 2334 start_time = time.time()
adbc4ec4
THD
2335 formats = [f for f in formats if f.get('is_from_start')]
2336
185bf310 2337 def refetch_manifest(format_id, delay):
2338 nonlocal formats, start_time, is_live
2339 if time.time() <= start_time + delay:
adbc4ec4
THD
2340 return
2341
2342 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2343 video_details = traverse_obj(
2344 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2345 microformats = traverse_obj(
2346 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2347 expected_type=dict, default=[])
c646d76f 2348 _, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
185bf310 2349 start_time = time.time()
adbc4ec4 2350
185bf310 2351 def mpd_feed(format_id, delay):
adbc4ec4
THD
2352 """
2353 @returns (manifest_url, manifest_stream_number, is_live) or None
2354 """
2355 with lock:
185bf310 2356 refetch_manifest(format_id, delay)
adbc4ec4
THD
2357
2358 f = next((f for f in formats if f['format_id'] == format_id), None)
2359 if not f:
185bf310 2360 if not is_live:
2361 self.to_screen(f'{video_id}: Video is no longer live')
2362 else:
2363 self.report_warning(
2364 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
adbc4ec4
THD
2365 return None
2366 return f['manifest_url'], f['manifest_stream_number'], is_live
2367
2368 for f in formats:
a539f065 2369 f['is_live'] = True
adbc4ec4
THD
2370 f['protocol'] = 'http_dash_segments_generator'
2371 f['fragments'] = functools.partial(
2372 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2373
2374 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2375 FETCH_SPAN, MAX_DURATION = 5, 432000
2376
2377 mpd_url, stream_number, is_live = None, None, True
2378
2379 begin_index = 0
2380 download_start_time = ctx.get('start') or time.time()
2381
2382 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2383 if lack_early_segments:
2384 self.report_warning(bug_reports_message(
2385 'Starting download from the last 120 hours of the live stream since '
2386 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2387 lack_early_segments = True
2388
2389 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2390 fragments, fragment_base_url = None, None
2391
a539f065 2392 def _extract_sequence_from_mpd(refresh_sequence, immediate):
adbc4ec4
THD
2393 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2394 # Obtain from MPD's maximum seq value
2395 old_mpd_url = mpd_url
185bf310 2396 last_error = ctx.pop('last_error', None)
14f25df2 2397 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
185bf310 2398 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2399 or (mpd_url, stream_number, False))
2400 if not refresh_sequence:
2401 if expire_fast and not is_live:
2402 return False, last_seq
2403 elif old_mpd_url == mpd_url:
2404 return True, last_seq
adbc4ec4
THD
2405 try:
2406 fmts, _ = self._extract_mpd_formats_and_subtitles(
2407 mpd_url, None, note=False, errnote=False, fatal=False)
2408 except ExtractorError:
2409 fmts = None
2410 if not fmts:
a539f065 2411 no_fragment_score += 2
adbc4ec4
THD
2412 return False, last_seq
2413 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2414 fragments = fmt_info['fragments']
2415 fragment_base_url = fmt_info['fragment_base_url']
2416 assert fragment_base_url
2417
2418 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2419 return True, _last_seq
2420
2421 while is_live:
2422 fetch_time = time.time()
2423 if no_fragment_score > 30:
2424 return
2425 if last_segment_url:
2426 # Obtain from "X-Head-Seqnum" header value from each segment
2427 try:
2428 urlh = self._request_webpage(
2429 last_segment_url, None, note=False, errnote=False, fatal=False)
2430 except ExtractorError:
2431 urlh = None
2432 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2433 if last_seq is None:
a539f065 2434 no_fragment_score += 2
adbc4ec4
THD
2435 last_segment_url = None
2436 continue
2437 else:
a539f065
LNO
2438 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2439 no_fragment_score += 2
185bf310 2440 if not should_continue:
adbc4ec4
THD
2441 continue
2442
2443 if known_idx > last_seq:
2444 last_segment_url = None
2445 continue
2446
2447 last_seq += 1
2448
2449 if begin_index < 0 and known_idx < 0:
2450 # skip from the start when it's negative value
2451 known_idx = last_seq + begin_index
2452 if lack_early_segments:
2453 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2454 try:
2455 for idx in range(known_idx, last_seq):
2456 # do not update sequence here or you'll get skipped some part of it
a539f065 2457 should_continue, _ = _extract_sequence_from_mpd(False, False)
185bf310 2458 if not should_continue:
adbc4ec4
THD
2459 known_idx = idx - 1
2460 raise ExtractorError('breaking out of outer loop')
2461 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2462 yield {
2463 'url': last_segment_url,
36195c44 2464 'fragment_count': last_seq,
adbc4ec4
THD
2465 }
2466 if known_idx == last_seq:
2467 no_fragment_score += 5
2468 else:
2469 no_fragment_score = 0
2470 known_idx = last_seq
2471 except ExtractorError:
2472 continue
2473
2474 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2475
b6de707d 2476 def _extract_player_url(self, *ytcfgs, webpage=None):
2477 player_url = traverse_obj(
2478 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
14f25df2 2479 get_all=False, expected_type=str)
11f9be09 2480 if not player_url:
b6de707d 2481 return
60f393e4 2482 return urljoin('https://www.youtube.com', player_url)
109dd3b2 2483
b6de707d 2484 def _download_player_url(self, video_id, fatal=False):
2485 res = self._download_webpage(
2486 'https://www.youtube.com/iframe_api',
2487 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2488 if res:
2489 player_version = self._search_regex(
2490 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2491 if player_version:
2492 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2493
60064c53
PH
2494 def _signature_cache_id(self, example_sig):
2495 """ Return a string representation of a signature """
14f25df2 2496 return '.'.join(str(len(part)) for part in example_sig.split('.'))
60064c53 2497
e40c758c
S
2498 @classmethod
2499 def _extract_player_info(cls, player_url):
2500 for player_re in cls._PLAYER_INFO_RE:
2501 id_m = re.search(player_re, player_url)
2502 if id_m:
2503 break
2504 else:
c081b35c 2505 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 2506 return id_m.group('id')
e40c758c 2507
404f611f 2508 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2509 player_id = self._extract_player_info(player_url)
2510 if player_id not in self._code_cache:
1276a43a 2511 code = self._download_webpage(
109dd3b2 2512 player_url, video_id, fatal=fatal,
2513 note='Downloading player ' + player_id,
2514 errnote='Download of %s failed' % player_url)
1276a43a 2515 if code:
2516 self._code_cache[player_id] = code
404f611f 2517 return self._code_cache.get(player_id)
109dd3b2 2518
e40c758c 2519 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2520 player_id = self._extract_player_info(player_url)
e0df6211 2521
c4417ddb 2522 # Read from filesystem cache
86e5f3ed 2523 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
c4417ddb 2524 assert os.path.basename(func_id) == func_id
a0e07d31 2525
ae61d108 2526 self.write_debug(f'Extracting signature function {func_id}')
580ce007 2527 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
83799698 2528
580ce007 2529 if not cache_spec:
2530 code = self._load_player(video_id, player_url)
404f611f 2531 if code:
109dd3b2 2532 res = self._parse_sig_js(code)
ac668111 2533 test_string = ''.join(map(chr, range(len(example_sig))))
580ce007 2534 cache_spec = [ord(c) for c in res(test_string)]
9809740b 2535 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
580ce007 2536
2537 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 2538
60064c53 2539 def _print_sig_code(self, func, example_sig):
404f611f 2540 if not self.get_param('youtube_print_sig_code'):
2541 return
2542
edf3e38e
PH
2543 def gen_sig_code(idxs):
2544 def _genslice(start, end, step):
78caa52a 2545 starts = '' if start == 0 else str(start)
8bcc8756 2546 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 2547 steps = '' if step == 1 else (':%d' % step)
86e5f3ed 2548 return f's[{starts}{ends}{steps}]'
edf3e38e
PH
2549
2550 step = None
7af808a5
PH
2551 # Quelch pyflakes warnings - start will be set when step is set
2552 start = '(Never used)'
edf3e38e
PH
2553 for i, prev in zip(idxs[1:], idxs[:-1]):
2554 if step is not None:
2555 if i - prev == step:
2556 continue
2557 yield _genslice(start, prev, step)
2558 step = None
2559 continue
2560 if i - prev in [-1, 1]:
2561 step = i - prev
2562 start = prev
2563 continue
2564 else:
78caa52a 2565 yield 's[%d]' % prev
edf3e38e 2566 if step is None:
78caa52a 2567 yield 's[%d]' % i
edf3e38e
PH
2568 else:
2569 yield _genslice(start, i, step)
2570
ac668111 2571 test_string = ''.join(map(chr, range(len(example_sig))))
c705320f 2572 cache_res = func(test_string)
edf3e38e 2573 cache_spec = [ord(c) for c in cache_res]
78caa52a 2574 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53 2575 signature_id_tuple = '(%s)' % (
14f25df2 2576 ', '.join(str(len(p)) for p in example_sig.split('.')))
69ea8ca4 2577 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 2578 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 2579 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 2580
e0df6211
PH
2581 def _parse_sig_js(self, jscode):
2582 funcname = self._search_regex(
abefc03f
S
2583 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2584 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
2585 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2586 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2587 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2588 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 2589 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
2590 # Obsolete patterns
2591 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 2592 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
2593 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2594 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2595 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2596 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2597 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2598 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2599 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2600
2601 jsi = JSInterpreter(jscode)
2602 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2603 return lambda s: initial_function([s])
2604
580ce007 2605 def _cached(self, func, *cache_id):
2606 def inner(*args, **kwargs):
2607 if cache_id not in self._player_cache:
2608 try:
2609 self._player_cache[cache_id] = func(*args, **kwargs)
2610 except ExtractorError as e:
2611 self._player_cache[cache_id] = e
2612 except Exception as e:
2613 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2614
2615 ret = self._player_cache[cache_id]
2616 if isinstance(ret, Exception):
2617 raise ret
2618 return ret
2619 return inner
2620
545cc85d 2621 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2622 """Turn the encrypted s field into a working signature"""
580ce007 2623 extract_sig = self._cached(
2624 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2625 func = extract_sig(video_id, player_url, s)
2626 self._print_sig_code(func, s)
2627 return func(s)
404f611f 2628
2629 def _decrypt_nsig(self, s, video_id, player_url):
2630 """Turn the encrypted n field into a working signature"""
2631 if player_url is None:
2632 raise ExtractorError('Cannot decrypt nsig without player_url')
60f393e4 2633 player_url = urljoin('https://www.youtube.com', player_url)
404f611f 2634
580ce007 2635 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2636 if self.get_param('youtube_print_sig_code'):
2637 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
404f611f 2638
25836db6 2639 try:
2640 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2641 ret = extract_nsig(jsi, func_code)(s)
2642 except JSInterpreter.Exception as e:
2643 try:
992dc6b4 2644 jsi = PhantomJSwrapper(self, timeout=5000)
25836db6 2645 except ExtractorError:
2646 raise e
2647 self.report_warning(
2648 f'Native nsig extraction failed: Trying with PhantomJS\n'
2649 f' n = {s} ; player = {player_url}', video_id)
2650 self.write_debug(e)
2651
2652 args, func_body = func_code
2653 ret = jsi.execute(
2654 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2655 video_id=video_id, note='Executing signature code').strip()
580ce007 2656
2657 self.write_debug(f'Decrypted nsig {s} => {ret}')
2658 return ret
2659
90a1df30 2660 def _extract_n_function_name(self, jscode):
2661 funcname, idx = self._search_regex(
2662 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2663 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2664 if not idx:
2665 return funcname
2666
2667 return json.loads(js_to_json(self._search_regex(
2668 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2669 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2670
580ce007 2671 def _extract_n_function_code(self, video_id, player_url):
404f611f 2672 player_id = self._extract_player_info(player_url)
d81ba7d4 2673 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.08.19.2')
580ce007 2674 jscode = func_code or self._load_player(video_id, player_url)
2675 jsi = JSInterpreter(jscode)
404f611f 2676
2677 if func_code:
580ce007 2678 return jsi, player_id, func_code
404f611f 2679
90a1df30 2680 func_code = jsi.extract_function_code(self._extract_n_function_name(jscode))
580ce007 2681 self.cache.store('youtube-nsig', player_id, func_code)
2682 return jsi, player_id, func_code
2683
2684 def _extract_n_function_from_code(self, jsi, func_code):
8f53dc44 2685 func = jsi.extract_function_from_code(*func_code)
f6ca640b 2686
580ce007 2687 def extract_nsig(s):
25836db6 2688 try:
2689 ret = func([s])
2690 except JSInterpreter.Exception:
2691 raise
2692 except Exception as e:
2693 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
2694
f6ca640b 2695 if ret.startswith('enhanced_except_'):
25836db6 2696 raise JSInterpreter.Exception('Signature function returned an exception')
f6ca640b 2697 return ret
580ce007 2698
2699 return extract_nsig
e0df6211 2700
109dd3b2 2701 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2702 """
2703 Extract signatureTimestamp (sts)
2704 Required to tell API what sig/player version is in use.
2705 """
2706 sts = None
2707 if isinstance(ytcfg, dict):
2708 sts = int_or_none(ytcfg.get('STS'))
2709
2710 if not sts:
2711 # Attempt to extract from player
2712 if player_url is None:
2713 error_msg = 'Cannot extract signature timestamp without player_url.'
2714 if fatal:
2715 raise ExtractorError(error_msg)
2716 self.report_warning(error_msg)
2717 return
404f611f 2718 code = self._load_player(video_id, player_url, fatal=fatal)
2719 if code:
109dd3b2 2720 sts = int_or_none(self._search_regex(
2721 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2722 'JS player signature timestamp', group='sts', fatal=fatal))
2723 return sts
2724
11f9be09 2725 def _mark_watched(self, video_id, player_responses):
06cc8f10
B
2726 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
2727 label = 'fully ' if is_full else ''
2728 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
2729 expected_type=url_or_none)
2730 if not url:
2731 self.report_warning(f'Unable to mark {label}watched')
2732 return
14f25df2 2733 parsed_url = urllib.parse.urlparse(url)
2734 qs = urllib.parse.parse_qs(parsed_url.query)
06cc8f10
B
2735
2736 # cpn generation algorithm is reverse engineered from base.js.
2737 # In fact it works even with dummy cpn.
2738 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2739 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2740
2741 # # more consistent results setting it to right before the end
2742 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
2743
2744 qs.update({
2745 'ver': ['2'],
2746 'cpn': [cpn],
2747 'cmt': video_length,
2748 'el': 'detailpage', # otherwise defaults to "shorts"
2749 })
2750
2751 if is_full:
2752 # these seem to mark watchtime "history" in the real world
2753 # they're required, so send in a single value
2754 qs.update({
2755 'st': video_length,
2756 'et': video_length,
2757 })
2758
14f25df2 2759 url = urllib.parse.urlunparse(
2760 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
06cc8f10
B
2761
2762 self._download_webpage(
2763 url, video_id, f'Marking {label}watched',
2764 'Unable to mark watched', fatal=False)
d77ab8e2 2765
bfd973ec 2766 @classmethod
2767 def _extract_from_webpage(cls, url, webpage):
2768 # Invidious Instances
2769 # https://github.com/yt-dlp/yt-dlp/issues/195
2770 # https://github.com/iv-org/invidious/pull/1730
2771 mobj = re.search(
2772 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
2773 webpage)
2774 if mobj:
2775 yield cls.url_result(mobj.group('url'), cls)
2776 raise cls.StopExtraction()
2777
2778 yield from super()._extract_from_webpage(url, webpage)
66c9fa36
S
2779
2780 # lazyYT YouTube embed
bfd973ec 2781 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
2782 yield cls.url_result(unescapeHTML(id_), cls, id_)
66c9fa36
S
2783
2784 # Wordpress "YouTube Video Importer" plugin
bfd973ec 2785 for m in re.findall(r'''(?x)<div[^>]+
2786 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2787 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
2788 yield cls.url_result(m[-1], cls, m[-1])
66c9fa36 2789
97665381
PH
2790 @classmethod
2791 def extract_id(cls, url):
ae61d108 2792 video_id = cls.get_temp_id(url)
2793 if not video_id:
2794 raise ExtractorError(f'Invalid URL: {url}')
2795 return video_id
c5e8d7af 2796
7c365c21 2797 def _extract_chapters_from_json(self, data, duration):
2798 chapter_list = traverse_obj(
2799 data, (
2800 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2801 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2802 ), expected_type=list)
2803
2804 return self._extract_chapters(
2805 chapter_list,
2806 chapter_time=lambda chapter: float_or_none(
2807 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2808 chapter_title=lambda chapter: traverse_obj(
2809 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2810 duration=duration)
2811
2812 def _extract_chapters_from_engagement_panel(self, data, duration):
2813 content_list = traverse_obj(
8bdd16b4 2814 data,
7c365c21 2815 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2816 expected_type=list, default=[])
052e1350 2817 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2818 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2819
1890fc63 2820 return next(filter(None, (
2821 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2822 chapter_time, chapter_title, duration)
2823 for contents in content_list)), [])
7c365c21 2824
1890fc63 2825 def _extract_chapters_from_description(self, description, duration):
2826 return self._extract_chapters(
2827 re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
2828 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
2829 duration=duration, strict=False)
84213ea8 2830
1890fc63 2831 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
2832 if not duration:
2833 return
2834 chapter_list = [{
2835 'start_time': chapter_time(chapter),
2836 'title': chapter_title(chapter),
2837 } for chapter in chapter_list or []]
2838 if not strict:
2839 chapter_list.sort(key=lambda c: c['start_time'] or 0)
2840
a3976e07 2841 chapters = [{'start_time': 0}]
1890fc63 2842 for idx, chapter in enumerate(chapter_list):
a3976e07 2843 if chapter['start_time'] is None:
1890fc63 2844 self.report_warning(f'Incomplete chapter {idx}')
2845 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
1890fc63 2846 chapters.append(chapter)
2847 else:
2848 self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
a3976e07 2849 return chapters[1:]
84213ea8 2850
a1c5d2ca
M
2851 def _extract_comment(self, comment_renderer, parent=None):
2852 comment_id = comment_renderer.get('commentId')
2853 if not comment_id:
2854 return
fe93e2c4 2855
052e1350 2856 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2857
49bd8c66 2858 # note: timestamp is an estimate calculated from the current time and time_text
f3aa3c3f 2859 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
052e1350 2860 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca 2861 author_id = try_get(comment_renderer,
14f25df2 2862 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
fe93e2c4 2863
49bd8c66 2864 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
14f25df2 2865 lambda x: x['likeCount']), str)) or 0
a1c5d2ca 2866 author_thumbnail = try_get(comment_renderer,
14f25df2 2867 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
a1c5d2ca
M
2868
2869 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2870 is_favorited = 'creatorHeart' in (try_get(
2871 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2872 return {
2873 'id': comment_id,
2874 'text': text,
d92f5d5a 2875 'timestamp': timestamp,
a1c5d2ca
M
2876 'time_text': time_text,
2877 'like_count': votes,
97524332 2878 'is_favorited': is_favorited,
a1c5d2ca
M
2879 'author': author,
2880 'author_id': author_id,
2881 'author_thumbnail': author_thumbnail,
2882 'author_is_uploader': author_is_uploader,
2883 'parent': parent or 'root'
2884 }
2885
46383212 2886 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2887
2888 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 2889
2890 def extract_header(contents):
2d6659b9 2891 _continuation = None
2892 for content in contents:
46383212 2893 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 2894 expected_comment_count = self._get_count(
2895 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 2896
2d6659b9 2897 if expected_comment_count:
46383212 2898 tracker['est_total'] = expected_comment_count
2899 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2900 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 2901
2902 sort_menu_item = try_get(
2903 comments_header_renderer,
2904 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2905 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2906
2907 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2908 if not _continuation:
2909 continue
2910
46383212 2911 sort_text = str_or_none(sort_menu_item.get('title'))
2912 if not sort_text:
2d6659b9 2913 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 2914 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 2915 break
a2160aa4 2916 return _continuation
a1c5d2ca 2917
2d6659b9 2918 def extract_thread(contents):
a1c5d2ca 2919 if not parent:
46383212 2920 tracker['current_page_thread'] = 0
a1c5d2ca 2921 for content in contents:
46383212 2922 if not parent and tracker['total_parent_comments'] >= max_parents:
2923 yield
a1c5d2ca 2924 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 2925 comment_renderer = get_first(
2926 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2927 expected_type=dict, default={})
a1c5d2ca 2928
a1c5d2ca
M
2929 comment = self._extract_comment(comment_renderer, parent)
2930 if not comment:
2931 continue
46383212 2932
2933 tracker['running_total'] += 1
2934 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 2935 yield comment
46383212 2936
a1c5d2ca
M
2937 # Attempt to get the replies
2938 comment_replies_renderer = try_get(
2939 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2940
2941 if comment_replies_renderer:
46383212 2942 tracker['current_page_thread'] += 1
a1c5d2ca 2943 comment_entries_iter = self._comment_entries(
99e9e001 2944 comment_replies_renderer, ytcfg, video_id,
46383212 2945 parent=comment.get('id'), tracker=tracker)
86e5f3ed 2946 yield from itertools.islice(comment_entries_iter, min(
2947 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
a1c5d2ca 2948
46383212 2949 # Keeps track of counts across recursive calls
2950 if not tracker:
2951 tracker = dict(
2952 running_total=0,
2953 est_total=0,
2954 current_page_thread=0,
2955 total_parent_comments=0,
2956 total_reply_comments=0)
2957
2958 # TODO: Deprecated
2d6659b9 2959 # YouTube comments have a max depth of 2
46383212 2960 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2961 if max_depth:
2962 self._downloader.deprecation_warning(
2963 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2d6659b9 2964 if max_depth == 1 and parent:
2965 return
a1c5d2ca 2966
46383212 2967 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2968 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 2969
46383212 2970 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 2971
46383212 2972 response = None
6e634cbe 2973 is_forced_continuation = False
2d6659b9 2974 is_first_continuation = parent is None
6e634cbe 2975 if is_first_continuation and not continuation:
2976 # Sometimes you can get comments by generating the continuation yourself,
2977 # even if YouTube initially reports them being disabled - e.g. stories comments.
2978 # Note: if the comment section is actually disabled, YouTube may return a response with
2979 # required check_get_keys missing. So we will disable that check initially in this case.
2980 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
2981 is_forced_continuation = True
a1c5d2ca
M
2982
2983 for page_num in itertools.count(0):
2984 if not continuation:
2985 break
46383212 2986 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2987 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 2988 if page_num == 0:
2989 if is_first_continuation:
2990 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2991 else:
2d6659b9 2992 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 2993 tracker['current_page_thread'], comment_prog_str)
2d6659b9 2994 else:
2995 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2996 ' ' if parent else '', ' replies' if parent else '',
2997 page_num, comment_prog_str)
2998
2999 response = self._extract_response(
fe93e2c4 3000 item_id=None, query=continuation,
2d6659b9 3001 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
6e634cbe 3002 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
3003 is_forced_continuation = False
46383212 3004 continuation_contents = traverse_obj(
3005 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 3006
2d6659b9 3007 continuation = None
46383212 3008 for continuation_section in continuation_contents:
3009 continuation_items = traverse_obj(
3010 continuation_section,
3011 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3012 get_all=False, expected_type=list) or []
3013 if is_first_continuation:
3014 continuation = extract_header(continuation_items)
3015 is_first_continuation = False
2d6659b9 3016 if continuation:
a1c5d2ca 3017 break
46383212 3018 continue
a1c5d2ca 3019
46383212 3020 for entry in extract_thread(continuation_items):
3021 if not entry:
3022 return
3023 yield entry
3024 continuation = self._extract_continuation({'contents': continuation_items})
3025 if continuation:
2d6659b9 3026 break
a1c5d2ca 3027
6e634cbe 3028 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3029 if message and not parent and tracker['running_total'] == 0:
3030 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3031
3032 @staticmethod
3033 def _generate_comment_continuation(video_id):
3034 """
3035 Generates initial comment section continuation token from given video id
3036 """
3037 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3038 return base64.b64encode(token.encode()).decode()
3039
a2160aa4 3040 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 3041 """Entry for comment extraction"""
2d6659b9 3042 def _real_comment_extract(contents):
aae16f6e 3043 renderer = next((
3044 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3045 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3046 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 3047
a2160aa4 3048 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 3049 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 3050
109dd3b2 3051 @staticmethod
99e9e001 3052 def _get_checkok_params():
3053 return {'contentCheckOk': True, 'racyCheckOk': True}
3054
3055 @classmethod
3056 def _generate_player_context(cls, sts=None):
109dd3b2 3057 context = {
3058 'html5Preference': 'HTML5_PREF_WANTS',
3059 }
3060 if sts is not None:
3061 context['signatureTimestamp'] = sts
3062 return {
3063 'playbackContext': {
3064 'contentPlaybackContext': context
a1a7907b 3065 },
99e9e001 3066 **cls._get_checkok_params()
109dd3b2 3067 }
3068
e7e94f2a
D
3069 @staticmethod
3070 def _is_agegated(player_response):
3071 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 3072 return True
e7e94f2a
D
3073
3074 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3075 AGE_GATE_REASONS = (
3076 'confirm your age', 'age-restricted', 'inappropriate', # reason
3077 'age_verification_required', 'age_check_required', # status
3078 )
3079 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3080
3081 @staticmethod
3082 def _is_unplayable(player_response):
3083 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 3084
50ac0e54 3085 _STORY_PLAYER_PARAMS = '8AEB'
3086
3087 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
109dd3b2 3088
11f9be09 3089 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3090 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 3091 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 3092 headers = self.generate_api_headers(
99e9e001 3093 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 3094
6e634cbe 3095 yt_query = {
3096 'videoId': video_id,
6e634cbe 3097 }
50ac0e54 3098 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3099 yt_query['params'] = self._STORY_PLAYER_PARAMS
3100
11f9be09 3101 yt_query.update(self._generate_player_context(sts))
3102 return self._extract_response(
3103 item_id=video_id, ep='player', query=yt_query,
379e44ed 3104 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 3105 default_client=client,
11f9be09 3106 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3107 ) or None
3108
11f9be09 3109 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 3110 requested_clients = []
d0d012d4 3111 default = ['android', 'web']
000c15a4 3112 allowed_clients = sorted(
86e5f3ed 3113 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
000c15a4 3114 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 3115 for client in self._configuration_arg('player_client'):
3116 if client in allowed_clients:
3117 requested_clients.append(client)
d0d012d4 3118 elif client == 'default':
3119 requested_clients.extend(default)
b4c055ba 3120 elif client == 'all':
3121 requested_clients.extend(allowed_clients)
3122 else:
3123 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 3124 if not requested_clients:
d0d012d4 3125 requested_clients = default
cf7e015f 3126
11f9be09 3127 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3128 requested_clients.extend(
e7e94f2a 3129 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 3130
11f9be09 3131 return orderedSet(requested_clients)
cf7e015f 3132
50ac0e54 3133 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
11f9be09 3134 initial_pr = None
3135 if webpage:
b7c47b74 3136 initial_pr = self._search_json(
3137 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
6b09401b 3138
ae729626 3139 all_clients = set(clients)
c0bc527b 3140 clients = clients[::-1]
b6de707d 3141 prs = []
e7e94f2a 3142
ae729626 3143 def append_client(*client_names):
e7870111 3144 """ Append the first client name that exists but not already used """
ae729626 3145 for client_name in client_names:
e7870111
D
3146 actual_client = _split_innertube_client(client_name)[0]
3147 if actual_client in INNERTUBE_CLIENTS:
3148 if actual_client not in all_clients:
ae729626 3149 clients.append(client_name)
e7870111
D
3150 all_clients.add(actual_client)
3151 return
e7e94f2a 3152
379e44ed 3153 # Android player_response does not have microFormats which are needed for
3154 # extraction of some data. So we return the initial_pr with formats
3155 # stripped out even if not requested by the user
3156 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 3157 if initial_pr:
3158 pr = dict(initial_pr)
3159 pr['streamingData'] = None
b6de707d 3160 prs.append(pr)
379e44ed 3161
3162 last_error = None
b6de707d 3163 tried_iframe_fallback = False
3164 player_url = None
c0bc527b 3165 while clients:
e7870111 3166 client, base_client, variant = _split_innertube_client(clients.pop())
11f9be09 3167 player_ytcfg = master_ytcfg if client == 'web' else {}
a25bca9f 3168 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3169 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
c0bc527b 3170
b6de707d 3171 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3172 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3173 if 'js' in self._configuration_arg('player_skip'):
3174 require_js_player = False
3175 player_url = None
3176
3177 if not player_url and not tried_iframe_fallback and require_js_player:
3178 player_url = self._download_player_url(video_id)
3179 tried_iframe_fallback = True
3180
379e44ed 3181 try:
3182 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
50ac0e54 3183 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
379e44ed 3184 except ExtractorError as e:
3185 if last_error:
3186 self.report_warning(last_error)
3187 last_error = e
3188 continue
3189
11f9be09 3190 if pr:
a3e96421 3191 # YouTube may return a different video player response than expected.
3192 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3193 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3194 if pr_video_id and pr_video_id != video_id:
3195 self.report_warning(
c7dcf0b3 3196 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
a3e96421 3197 else:
3198 prs.append(pr)
c0bc527b 3199
e7e94f2a 3200 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
e7870111
D
3201 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3202 append_client(f'{base_client}_creator')
e7e94f2a 3203 elif self._is_agegated(pr):
e7870111
D
3204 if variant == 'tv_embedded':
3205 append_client(f'{base_client}_embedded')
3206 elif not variant:
3207 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
c0bc527b 3208
379e44ed 3209 if last_error:
b6de707d 3210 if not len(prs):
379e44ed 3211 raise last_error
3212 self.report_warning(last_error)
b6de707d 3213 return prs, player_url
11f9be09 3214
c646d76f 3215 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
a0bb6ce5 3216 itags, stream_ids = {}, []
b25cac65 3217 itag_qualities, res_qualities = {}, {0: None}
d3fc8074 3218 q = qualities([
2a9c6dcd 3219 # Normally tiny is the smallest video-only formats. But
3220 # audio-only formats with unknown quality may get tagged as tiny
3221 'tiny',
3222 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 3223 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3224 ])
11f9be09 3225 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 3226
545cc85d 3227 for fmt in streaming_formats:
727029c5 3228 if fmt.get('targetDurationSec'):
545cc85d 3229 continue
321bf820 3230
cc2db878 3231 itag = str_or_none(fmt.get('itag'))
9297939e 3232 audio_track = fmt.get('audioTrack') or {}
3233 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3234 if stream_id in stream_ids:
3235 continue
3236
cc2db878 3237 quality = fmt.get('quality')
2a9c6dcd 3238 height = int_or_none(fmt.get('height'))
d3fc8074 3239 if quality == 'tiny' or not quality:
3240 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 3241 # The 3gp format (17) in android client has a quality of "small",
3242 # but is actually worse than other formats
3243 if itag == '17':
3244 quality = 'tiny'
3245 if quality:
3246 if itag:
3247 itag_qualities[itag] = quality
3248 if height:
3249 res_qualities[height] = quality
cc2db878 3250 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3251 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3252 # number of fragment that would subsequently requested with (`&sq=N`)
3253 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3254 continue
3255
545cc85d 3256 fmt_url = fmt.get('url')
3257 if not fmt_url:
14f25df2 3258 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
545cc85d 3259 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3260 encrypted_sig = try_get(sc, lambda x: x['s'][0])
52023f12 3261 if not all((sc, fmt_url, player_url, encrypted_sig)):
545cc85d 3262 continue
52023f12 3263 try:
3264 fmt_url += '&%s=%s' % (
3265 traverse_obj(sc, ('sp', -1)) or 'signature',
3266 self._decrypt_signature(encrypted_sig, video_id, player_url)
3267 )
3268 except ExtractorError as e:
580ce007 3269 self.report_warning('Signature extraction failed: Some formats may be missing',
3270 video_id=video_id, only_once=True)
52023f12 3271 self.write_debug(e, only_once=True)
201e9eaa 3272 continue
545cc85d 3273
404f611f 3274 query = parse_qs(fmt_url)
3275 throttled = False
b2916526 3276 if query.get('n'):
404f611f 3277 try:
580ce007 3278 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
404f611f 3279 fmt_url = update_url_query(fmt_url, {
580ce007 3280 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3281 })
404f611f 3282 except ExtractorError as e:
25836db6 3283 phantomjs_hint = ''
3284 if isinstance(e, JSInterpreter.Exception):
d81ba7d4 3285 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3286 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
aa9369a2 3287 self.report_warning(
25836db6 3288 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3289 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
52023f12 3290 self.write_debug(e, only_once=True)
404f611f 3291 throttled = True
3292
545cc85d 3293 if itag:
a0bb6ce5 3294 itags[itag] = 'https'
9297939e 3295 stream_ids.append(stream_id)
3296
0ad92dfb 3297 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
ab6df717 3298 language_preference = (
3299 10 if audio_track.get('audioIsDefault') and 10
3300 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3301 else -1)
0ad92dfb 3302 # Some formats may have much smaller duration than others (possibly damaged during encoding)
62b58c09 3303 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
a1b2d843 3304 # Make sure to avoid false positives with small duration differences.
62b58c09 3305 # E.g. __2ABJjxzNo, ySuUZEjARPY
a1b2d843 3306 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
08d30158 3307 if is_damaged:
0f06bcd7 3308 self.report_warning(
3309 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
545cc85d 3310 dct = {
3311 'asr': int_or_none(fmt.get('audioSampleRate')),
3312 'filesize': int_or_none(fmt.get('contentLength')),
3313 'format_id': itag,
34921b43 3314 'format_note': join_nonempty(
26e8e044 3315 '%s%s' % (audio_track.get('displayName') or '',
ab6df717 3316 ' (default)' if language_preference > 0 else ''),
404f611f 3317 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
a4166234 3318 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3319 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
0ad92dfb 3320 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
91e5e839 3321 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3322 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
a4211baf 3323 'fps': int_or_none(fmt.get('fps')) or None,
a4166234 3324 'audio_channels': fmt.get('audioChannels'),
2a9c6dcd 3325 'height': height,
dca3ff4a 3326 'quality': q(quality),
727029c5 3327 'has_drm': bool(fmt.get('drmFamilies')),
cc2db878 3328 'tbr': tbr,
545cc85d 3329 'url': fmt_url,
2a9c6dcd 3330 'width': int_or_none(fmt.get('width')),
ab6df717 3331 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3332 'desc' if language_preference < -1 else ''),
3333 'language_preference': language_preference,
a405b38f 3334 # Strictly de-prioritize damaged and 3gp formats
3335 'preference': -10 if is_damaged else -2 if itag == '17' else None,
545cc85d 3336 }
60bdb7bd 3337 mime_mobj = re.match(
3338 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3339 if mime_mobj:
3340 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3341 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 3342 no_audio = dct.get('acodec') == 'none'
3343 no_video = dct.get('vcodec') == 'none'
3344 if no_audio:
3345 dct['vbr'] = tbr
3346 if no_video:
3347 dct['abr'] = tbr
3348 if no_audio or no_video:
545cc85d 3349 dct['downloader_options'] = {
3350 # Youtube throttles chunks >~10M
3351 'http_chunk_size': 10485760,
bf1317d2 3352 }
7c60c33e 3353 if dct.get('ext'):
3354 dct['container'] = dct['ext'] + '_dash'
11f9be09 3355 yield dct
545cc85d 3356
adbc4ec4 3357 live_from_start = is_live and self.get_param('live_from_start')
4bb6b02f 3358 skip_manifests = self._configuration_arg('skip')
adbc4ec4
THD
3359 if not self.get_param('youtube_include_hls_manifest', True):
3360 skip_manifests.append('hls')
0f06bcd7 3361 if not self.get_param('youtube_include_dash_manifest', True):
3362 skip_manifests.append('dash')
adbc4ec4
THD
3363 get_dash = 'dash' not in skip_manifests and (
3364 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3365 get_hls = not live_from_start and 'hls' not in skip_manifests
5d3a0e79 3366
a0bb6ce5 3367 def process_manifest_format(f, proto, itag):
3368 if itag in itags:
3369 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3370 return False
3371 itag = f'{itag}-{proto}'
3372 if itag:
3373 f['format_id'] = itag
3374 itags[itag] = proto
3375
b25cac65 3376 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
5c6d2ef9 3377 if f['quality'] == -1 and f.get('height'):
3378 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
a0bb6ce5 3379 return True
2a9c6dcd 3380
c646d76f 3381 subtitles = {}
11f9be09 3382 for sd in streaming_data:
5d3a0e79 3383 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 3384 if hls_manifest_url:
c646d76f 3385 fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)
3386 subtitles = self._merge_subtitles(subs, subtitles)
3387 for f in fmts:
a0bb6ce5 3388 if process_manifest_format(f, 'hls', self._search_regex(
3389 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3390 yield f
545cc85d 3391
5d3a0e79 3392 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3393 if dash_manifest_url:
c646d76f 3394 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3395 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3396 for f in formats:
a0bb6ce5 3397 if process_manifest_format(f, 'dash', f['format_id']):
3398 f['filesize'] = int_or_none(self._search_regex(
3399 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
adbc4ec4
THD
3400 if live_from_start:
3401 f['is_from_start'] = True
3402
a0bb6ce5 3403 yield f
c646d76f 3404 yield subtitles
11f9be09 3405
720c3099 3406 def _extract_storyboard(self, player_responses, duration):
3407 spec = get_first(
3408 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 3409 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3410 if not base_url:
720c3099 3411 return
720c3099 3412 L = len(spec) - 1
3413 for i, args in enumerate(spec):
3414 args = args.split('#')
3415 counts = list(map(int_or_none, args[:5]))
3416 if len(args) != 8 or not all(counts):
3417 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3418 continue
3419 width, height, frame_count, cols, rows = counts
3420 N, sigh = args[6:]
3421
3422 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3423 fragment_count = frame_count / (cols * rows)
3424 fragment_duration = duration / fragment_count
3425 yield {
3426 'format_id': f'sb{i}',
3427 'format_note': 'storyboard',
3428 'ext': 'mhtml',
3429 'protocol': 'mhtml',
3430 'acodec': 'none',
3431 'vcodec': 'none',
3432 'url': url,
3433 'width': width,
3434 'height': height,
45e8a04e 3435 'fps': frame_count / duration,
3436 'rows': rows,
3437 'columns': cols,
720c3099 3438 'fragments': [{
b3edc806 3439 'url': url.replace('$M', str(j)),
720c3099 3440 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3441 } for j in range(math.ceil(fragment_count))],
3442 }
3443
adbc4ec4 3444 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 3445 webpage = None
3446 if 'webpage' not in self._configuration_arg('player_skip'):
50ac0e54 3447 query = {'bpctr': '9999999999', 'has_verified': '1'}
3448 if smuggled_data.get('is_story'):
3449 query['pp'] = self._STORY_PLAYER_PARAMS
b6de707d 3450 webpage = self._download_webpage(
50ac0e54 3451 webpage_url, video_id, fatal=False, query=query)
11f9be09 3452
3453 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 3454
b6de707d 3455 player_responses, player_url = self._extract_player_responses(
11f9be09 3456 self._get_requested_clients(url, smuggled_data),
50ac0e54 3457 video_id, webpage, master_ytcfg, smuggled_data)
11f9be09 3458
adbc4ec4
THD
3459 return webpage, master_ytcfg, player_responses, player_url
3460
a1b2d843 3461 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
adbc4ec4
THD
3462 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3463 is_live = get_first(video_details, 'isLive')
3464 if is_live is None:
3465 is_live = get_first(live_broadcast_details, 'isLiveNow')
3466
3467 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
c646d76f 3468 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)
adbc4ec4 3469
c646d76f 3470 return live_broadcast_details, is_live, streaming_data, formats, subtitles
adbc4ec4
THD
3471
3472 def _real_extract(self, url):
3473 url, smuggled_data = unsmuggle_url(url, {})
3474 video_id = self._match_id(url)
3475
3476 base_url = self.http_scheme() + '//www.youtube.com/'
3477 webpage_url = base_url + 'watch?v=' + video_id
3478
3479 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3480
11f9be09 3481 playability_statuses = traverse_obj(
3482 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3483
3484 trailer_video_id = get_first(
3485 playability_statuses,
3486 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3487 expected_type=str)
3488 if trailer_video_id:
3489 return self.url_result(
3490 trailer_video_id, self.ie_key(), trailer_video_id)
3491
3492 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3493 if webpage else (lambda x: None))
3494
3495 video_details = traverse_obj(
3496 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3497 microformats = traverse_obj(
3498 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3499 expected_type=dict, default=[])
3500 video_title = (
3501 get_first(video_details, 'title')
3502 or self._get_text(microformats, (..., 'title'))
3503 or search_meta(['og:title', 'twitter:title', 'title']))
3504 video_description = get_first(video_details, 'shortDescription')
3505
d89257f3 3506 multifeed_metadata_list = get_first(
3507 player_responses,
3508 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3509 expected_type=str)
3510 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3511 if self.get_param('noplaylist'):
11f9be09 3512 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 3513 else:
3514 entries = []
3515 feed_ids = []
3516 for feed in multifeed_metadata_list.split(','):
3517 # Unquote should take place before split on comma (,) since textual
3518 # fields may contain comma as well (see
3519 # https://github.com/ytdl-org/youtube-dl/issues/8536)
14f25df2 3520 feed_data = urllib.parse.parse_qs(
ac668111 3521 urllib.parse.unquote_plus(feed))
d89257f3 3522
3523 def feed_entry(name):
3524 return try_get(
14f25df2 3525 feed_data, lambda x: x[name][0], str)
d89257f3 3526
3527 feed_id = feed_entry('id')
3528 if not feed_id:
3529 continue
3530 feed_title = feed_entry('title')
3531 title = video_title
3532 if feed_title:
3533 title += ' (%s)' % feed_title
3534 entries.append({
3535 '_type': 'url_transparent',
3536 'ie_key': 'Youtube',
3537 'url': smuggle_url(
3538 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3539 {'force_singlefeed': True}),
3540 'title': title,
3541 })
3542 feed_ids.append(feed_id)
3543 self.to_screen(
3544 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3545 % (', '.join(feed_ids), video_id))
3546 return self.playlist_result(
3547 entries, video_id, video_title, video_description)
11f9be09 3548
a1b2d843 3549 duration = int_or_none(
3550 get_first(video_details, 'lengthSeconds')
3551 or get_first(microformats, 'lengthSeconds')
3552 or parse_duration(search_meta('duration'))) or None
3553
c646d76f 3554 live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \
3555 self._list_formats(video_id, microformats, video_details, player_responses, player_url)
bf1317d2 3556
545cc85d 3557 if not formats:
11f9be09 3558 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 3559 self.report_drm(video_id)
11f9be09 3560 pemr = get_first(
3561 playability_statuses,
3562 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3563 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3564 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 3565 if subreason:
545cc85d 3566 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 3567 countries = get_first(microformats, 'availableCountries')
545cc85d 3568 if not countries:
3569 regions_allowed = search_meta('regionsAllowed')
3570 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 3571 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 3572 reason += f'. {subreason}'
545cc85d 3573 if reason:
b7da73eb 3574 self.raise_no_formats(reason, expected=True)
bf1317d2 3575
11f9be09 3576 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 3577 if not keywords and webpage:
3578 keywords = [
3579 unescapeHTML(m.group('content'))
3580 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3581 for keyword in keywords:
3582 if keyword.startswith('yt:stretch='):
201c1459 3583 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3584 if mobj:
3585 # NB: float is intentional for forcing float division
3586 w, h = (float(v) for v in mobj.groups())
3587 if w > 0 and h > 0:
3588 ratio = w / h
3589 for f in formats:
3590 if f.get('vcodec') != 'none':
3591 f['stretched_ratio'] = ratio
3592 break
a709d873 3593 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 3594 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3595 if thumbnail_url:
3596 thumbnails.append({
3597 'url': thumbnail_url,
ff2751ac 3598 })
fccf5021 3599 original_thumbnails = thumbnails.copy()
3600
0ba692ac 3601 # The best resolution thumbnails sometimes does not appear in the webpage
bfec31be 3602 # See: https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 3603 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 3604 thumbnail_names = [
962ffcf8 3605 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
bfec31be 3606 # in resolution, these are not the custom thumbnail. So de-prioritize them
3607 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3608 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
cca80fe6 3609 ]
cca80fe6 3610 n_thumbnail_names = len(thumbnail_names)
0ba692ac 3611 thumbnails.extend({
3612 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3613 video_id=video_id, name=name, ext=ext,
3614 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 3615 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 3616 for thumb in thumbnails:
cca80fe6 3617 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 3618 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 3619 self._remove_duplicate_formats(thumbnails)
fccf5021 3620 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 3621
7ea65411 3622 category = get_first(microformats, 'category') or search_meta('genre')
3623 channel_id = str_or_none(
3624 get_first(video_details, 'channelId')
3625 or get_first(microformats, 'externalChannelId')
3626 or search_meta('channelId'))
7ea65411 3627 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3628
3629 live_content = get_first(video_details, 'isLiveContent')
3630 is_upcoming = get_first(video_details, 'isUpcoming')
3631 if is_live is None:
3632 if is_upcoming or live_content is False:
3633 is_live = False
3634 if is_upcoming is None and (live_content or is_live):
3635 is_upcoming = False
adbc4ec4
THD
3636 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3637 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3638 if not duration and live_end_time and live_start_time:
3639 duration = live_end_time - live_start_time
3640
3641 if is_live and self.get_param('live_from_start'):
3642 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
7ea65411 3643
720c3099 3644 formats.extend(self._extract_storyboard(player_responses, duration))
3645
31b532a1 3646 # source_preference is lower for throttled/potentially damaged formats
7e798d72 3647 self._sort_formats(formats, (
3648 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
720c3099 3649
545cc85d 3650 info = {
3651 'id': video_id,
39ca3b5c 3652 'title': video_title,
545cc85d 3653 'formats': formats,
3654 'thumbnails': thumbnails,
fccf5021 3655 # The best thumbnail that we are sure exists. Prevents unnecessary
3656 # URL checking if user don't care about getting the best possible thumbnail
3657 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 3658 'description': video_description,
11f9be09 3659 'uploader': get_first(video_details, 'author'),
545cc85d 3660 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3661 'uploader_url': owner_profile_url,
3662 'channel_id': channel_id,
a70635b8 3663 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
545cc85d 3664 'duration': duration,
3665 'view_count': int_or_none(
11f9be09 3666 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 3667 or search_meta('interactionCount')),
11f9be09 3668 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 3669 'age_limit': 18 if (
11f9be09 3670 get_first(microformats, 'isFamilySafe') is False
545cc85d 3671 or search_meta('isFamilyFriendly') == 'false'
3672 or search_meta('og:restrictions:age') == '18+') else 0,
3673 'webpage_url': webpage_url,
3674 'categories': [category] if category else None,
3675 'tags': keywords,
11f9be09 3676 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 3677 'is_live': is_live,
3678 'was_live': (False if is_live or is_upcoming or live_content is False
3679 else None if is_live is None or is_upcoming is None
3680 else live_content),
3681 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
adbc4ec4 3682 'release_timestamp': live_start_time,
545cc85d 3683 }
b477fc13 3684
e325a21a 3685 if get_first(video_details, 'isPostLiveDvr'):
3686 self.write_debug('Video is in Post-Live Manifestless mode')
3687 info['live_status'] = 'post_live'
3688 if (duration or 0) > 4 * 3600:
3689 self.report_warning(
3690 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
3691 'This is a known issue and patches are welcome')
3692
c646d76f 3693 subtitles = {}
3944e7af 3694 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 3695 if pctr:
ecdc9049 3696 def get_lang_code(track):
3697 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3698 or track.get('languageCode'))
3699
3700 # Converted into dicts to remove duplicates
3701 captions = {
3702 get_lang_code(sub): sub
3703 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3704 translation_languages = {
3705 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3706 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3707
774d79cc 3708 def process_language(container, base_url, lang_code, sub_name, query):
120916da 3709 lang_subs = container.setdefault(lang_code, [])
545cc85d 3710 for fmt in self._SUBTITLE_FORMATS:
3711 query.update({
3712 'fmt': fmt,
3713 })
3714 lang_subs.append({
3715 'ext': fmt,
60f393e4 3716 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
774d79cc 3717 'name': sub_name,
545cc85d 3718 })
7e72694b 3719
07b47084 3720 # NB: Constructing the full subtitle dictionary is slow
3721 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
3722 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
ecdc9049 3723 for lang_code, caption_track in captions.items():
3724 base_url = caption_track.get('baseUrl')
1235d333 3725 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
545cc85d 3726 if not base_url:
3727 continue
ecdc9049 3728 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 3729 if caption_track.get('kind') != 'asr':
545cc85d 3730 if not lang_code:
3731 continue
3732 process_language(
ecdc9049 3733 subtitles, base_url, lang_code, lang_name, {})
3734 if not caption_track.get('isTranslatable'):
3735 continue
3944e7af 3736 for trans_code, trans_name in translation_languages.items():
3737 if not trans_code:
545cc85d 3738 continue
1235d333 3739 orig_trans_code = trans_code
ecdc9049 3740 if caption_track.get('kind') != 'asr':
07b47084 3741 if not get_translated_subs:
18e49408 3742 continue
ecdc9049 3743 trans_code += f'-{lang_code}'
a70635b8 3744 trans_name += format_field(lang_name, None, ' from %s')
d49669ac 3745 # Add an "-orig" label to the original language so that it can be distinguished.
3746 # The subs are returned without "-orig" as well for compatibility
1235d333 3747 if lang_code == f'a-{orig_trans_code}':
0c8d9e5f 3748 process_language(
d49669ac 3749 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3750 # Setting tlang=lang returns damaged subtitles.
d49669ac 3751 process_language(automatic_captions, base_url, trans_code, trans_name,
1235d333 3752 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
c646d76f 3753
3754 info['automatic_captions'] = automatic_captions
3755 info['subtitles'] = subtitles
7e72694b 3756
14f25df2 3757 parsed_url = urllib.parse.urlparse(url)
545cc85d 3758 for component in [parsed_url.fragment, parsed_url.query]:
14f25df2 3759 query = urllib.parse.parse_qs(component)
545cc85d 3760 for k, v in query.items():
3761 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3762 d_k += '_time'
3763 if d_k not in info and k in s_ks:
3764 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
3765
3766 # Youtube Music Auto-generated description
822b9d9c 3767 if video_description:
1890fc63 3768 mobj = re.search(
3769 r'''(?xs)
3770 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
3771 (?P<album>[^\n]+)
3772 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
3773 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
3774 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
3775 .+\nAuto-generated\ by\ YouTube\.\s*$
3776 ''', video_description)
822b9d9c 3777 if mobj:
822b9d9c
RA
3778 release_year = mobj.group('release_year')
3779 release_date = mobj.group('release_date')
3780 if release_date:
3781 release_date = release_date.replace('-', '')
3782 if not release_year:
545cc85d 3783 release_year = release_date[:4]
3784 info.update({
3785 'album': mobj.group('album'.strip()),
3786 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3787 'track': mobj.group('track').strip(),
3788 'release_date': release_date,
cc2db878 3789 'release_year': int_or_none(release_year),
545cc85d 3790 })
7e72694b 3791
545cc85d 3792 initial_data = None
3793 if webpage:
56ba69e4 3794 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
545cc85d 3795 if not initial_data:
99e9e001 3796 query = {'videoId': video_id}
3797 query.update(self._get_checkok_params())
109dd3b2 3798 initial_data = self._extract_response(
3799 item_id=video_id, ep='next', fatal=False,
99e9e001 3800 ytcfg=master_ytcfg, query=query,
3801 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 3802 note='Downloading initial data API JSON')
545cc85d 3803
0df111a3 3804 info['comment_count'] = traverse_obj(initial_data, (
3805 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
3806 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
3807 ), (
3808 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
3809 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
3810 ), expected_type=int_or_none, get_all=False)
3811
19a03940 3812 try: # This will error if there is no livechat
c60ee3a2 3813 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
19a03940 3814 except (KeyError, IndexError, TypeError):
3815 pass
3816 else:
ecdc9049 3817 info.setdefault('subtitles', {})['live_chat'] = [{
4ce05f57 3818 # url is needed to set cookies
3819 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
c60ee3a2 3820 'video_id': video_id,
3821 'ext': 'json',
f6745c49 3822 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3823 }]
545cc85d 3824
3825 if initial_data:
7c365c21 3826 info['chapters'] = (
3827 self._extract_chapters_from_json(initial_data, duration)
3828 or self._extract_chapters_from_engagement_panel(initial_data, duration)
0fe51254 3829 or self._extract_chapters_from_description(video_description, duration)
7c365c21 3830 or None)
545cc85d 3831
17322130 3832 contents = traverse_obj(
3833 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
3834 expected_type=list, default=[])
3835
3836 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
3837 if vpir:
3838 stl = vpir.get('superTitleLink')
3839 if stl:
3840 stl = self._get_text(stl)
3841 if try_get(
3842 vpir,
3843 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3844 info['location'] = stl
3845 else:
affc4fef 3846 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
17322130 3847 if mobj:
545cc85d 3848 info.update({
17322130 3849 'series': mobj.group(1),
3850 'season_number': int(mobj.group(2)),
3851 'episode_number': int(mobj.group(3)),
545cc85d 3852 })
17322130 3853 for tlb in (try_get(
3854 vpir,
3855 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3856 list) or []):
3857 tbr = tlb.get('toggleButtonRenderer') or {}
3858 for getter, regex in [(
3859 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3860 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3861 lambda x: x['accessibility'],
3862 lambda x: x['accessibilityData']['accessibilityData'],
3863 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3864 label = (try_get(tbr, getter, dict) or {}).get('label')
3865 if label:
3866 mobj = re.match(regex, label)
3867 if mobj:
3868 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
545cc85d 3869 break
17322130 3870 sbr_tooltip = try_get(
3871 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3872 if sbr_tooltip:
3873 like_count, dislike_count = sbr_tooltip.split(' / ')
3874 info.update({
3875 'like_count': str_to_int(like_count),
3876 'dislike_count': str_to_int(dislike_count),
3877 })
3878 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
3879 if vsir:
3880 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
3881 info.update({
3882 'channel': self._get_text(vor, 'title'),
3883 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
3884
3885 rows = try_get(
3886 vsir,
3887 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3888 list) or []
3889 multiple_songs = False
3890 for row in rows:
3891 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3892 multiple_songs = True
3893 break
3894 for row in rows:
3895 mrr = row.get('metadataRowRenderer') or {}
3896 mrr_title = mrr.get('title')
3897 if not mrr_title:
3898 continue
3899 mrr_title = self._get_text(mrr, 'title')
3900 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3901 if mrr_title == 'License':
3902 info['license'] = mrr_contents_text
3903 elif not multiple_songs:
3904 if mrr_title == 'Album':
3905 info['album'] = mrr_contents_text
3906 elif mrr_title == 'Artist':
3907 info['artist'] = mrr_contents_text
3908 elif mrr_title == 'Song':
3909 info['track'] = mrr_contents_text
545cc85d 3910
3911 fallbacks = {
3912 'channel': 'uploader',
3913 'channel_id': 'uploader_id',
3914 'channel_url': 'uploader_url',
3915 }
992f9a73 3916
17322130 3917 # The upload date for scheduled, live and past live streams / premieres in microformats
3918 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
992f9a73 3919 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
17322130 3920 upload_date = (
3921 unified_strdate(get_first(microformats, 'uploadDate'))
3922 or unified_strdate(search_meta('uploadDate')))
3923 if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
6e634cbe 3924 upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
17322130 3925 info['upload_date'] = upload_date
992f9a73 3926
545cc85d 3927 for to, frm in fallbacks.items():
3928 if not info.get(to):
3929 info[to] = info.get(frm)
3930
3931 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3932 v = info.get(s_k)
3933 if v:
3934 info[d_k] = v
b84071c0 3935
11f9be09 3936 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3937 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3938 is_membersonly = None
b28f8d24 3939 is_premium = None
c224251a
M
3940 if initial_data and is_private is not None:
3941 is_membersonly = False
b28f8d24 3942 is_premium = False
47193e02 3943 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3944 badge_labels = set()
3945 for content in contents:
3946 if not isinstance(content, dict):
3947 continue
3948 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3949 for badge_label in badge_labels:
3950 if badge_label.lower() == 'members only':
3951 is_membersonly = True
3952 elif badge_label.lower() == 'premium':
3953 is_premium = True
3954 elif badge_label.lower() == 'unlisted':
3955 is_unlisted = True
c224251a 3956
c224251a
M
3957 info['availability'] = self._availability(
3958 is_private=is_private,
b28f8d24 3959 needs_premium=is_premium,
c224251a
M
3960 needs_subscription=is_membersonly,
3961 needs_auth=info['age_limit'] >= 18,
3962 is_unlisted=None if is_private is None else is_unlisted)
3963
a2160aa4 3964 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3965
11f9be09 3966 self.mark_watched(video_id, player_responses)
d77ab8e2 3967
545cc85d 3968 return info
c5e8d7af 3969
a61fd4cf 3970
a6213a49 3971class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
5f6a1245 3972
182bda88 3973 @staticmethod
3974 def passthrough_smuggled_data(func):
3975 def _smuggle(entries, smuggled_data):
3976 for entry in entries:
3977 # TODO: Convert URL to music.youtube instead.
3978 # Do we need to passthrough any other smuggled_data?
3979 entry['url'] = smuggle_url(entry['url'], smuggled_data)
3980 yield entry
3981
3982 @functools.wraps(func)
3983 def wrapper(self, url):
3984 url, smuggled_data = unsmuggle_url(url, {})
3985 if self.is_music_url(url):
3986 smuggled_data['is_music_url'] = True
3987 info_dict = func(self, url, smuggled_data)
3988 if smuggled_data and info_dict.get('entries'):
3989 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
3990 return info_dict
3991 return wrapper
3992
a6213a49 3993 def _extract_channel_id(self, webpage):
3994 channel_id = self._html_search_meta(
3995 'channelId', webpage, 'channel id', default=None)
3996 if channel_id:
3997 return channel_id
3998 channel_url = self._html_search_meta(
3999 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
4000 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
4001 'twitter:app:url:googleplay'), webpage, 'channel url')
4002 return self._search_regex(
4003 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
4004 channel_url, 'channel id')
15f6397c 4005
8bdd16b4 4006 @staticmethod
cd7c66cf 4007 def _extract_basic_item_renderer(item):
4008 # Modified from _extract_grid_item_renderer
201c1459 4009 known_basic_renderers = (
a17526e4 4010 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
cd7c66cf 4011 )
4012 for key, renderer in item.items():
201c1459 4013 if not isinstance(renderer, dict):
cd7c66cf 4014 continue
201c1459 4015 elif key in known_basic_renderers:
4016 return renderer
4017 elif key.startswith('grid') and key.endswith('Renderer'):
4018 return renderer
8bdd16b4 4019
8bdd16b4 4020 def _grid_entries(self, grid_renderer):
4021 for item in grid_renderer['items']:
4022 if not isinstance(item, dict):
39b62db1 4023 continue
cd7c66cf 4024 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 4025 if not isinstance(renderer, dict):
4026 continue
052e1350 4027 title = self._get_text(renderer, 'title')
fe93e2c4 4028
8bdd16b4 4029 # playlist
4030 playlist_id = renderer.get('playlistId')
4031 if playlist_id:
4032 yield self.url_result(
4033 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4034 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4035 video_title=title)
201c1459 4036 continue
8bdd16b4 4037 # video
4038 video_id = renderer.get('videoId')
4039 if video_id:
4040 yield self._extract_video(renderer)
201c1459 4041 continue
8bdd16b4 4042 # channel
4043 channel_id = renderer.get('channelId')
4044 if channel_id:
8bdd16b4 4045 yield self.url_result(
4046 'https://www.youtube.com/channel/%s' % channel_id,
4047 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 4048 continue
4049 # generic endpoint URL support
4050 ep_url = urljoin('https://www.youtube.com/', try_get(
4051 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4052 str))
201c1459 4053 if ep_url:
4054 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4055 if ie.suitable(ep_url):
4056 yield self.url_result(
4057 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4058 break
8bdd16b4 4059
16aa9ea4 4060 def _music_reponsive_list_entry(self, renderer):
4061 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4062 if video_id:
4063 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4064 ie=YoutubeIE.ie_key(), video_id=video_id)
4065 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4066 if playlist_id:
4067 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4068 if video_id:
4069 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4070 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4071 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4072 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4073 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4074 if browse_id:
4075 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4076 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4077
3d3dddc9 4078 def _shelf_entries_from_content(self, shelf_renderer):
4079 content = shelf_renderer.get('content')
4080 if not isinstance(content, dict):
8bdd16b4 4081 return
cd7c66cf 4082 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 4083 if renderer:
4084 # TODO: add support for nested playlists so each shelf is processed
4085 # as separate playlist
4086 # TODO: this includes only first N items
86e5f3ed 4087 yield from self._grid_entries(renderer)
3d3dddc9 4088 renderer = content.get('horizontalListRenderer')
4089 if renderer:
4090 # TODO
4091 pass
8bdd16b4 4092
29f7c58a 4093 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 4094 ep = try_get(
4095 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4096 str)
8bdd16b4 4097 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 4098 if shelf_url:
29f7c58a 4099 # Skipping links to another channels, note that checking for
4100 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4101 # will not work
4102 if skip_channels and '/channels?' in shelf_url:
4103 return
052e1350 4104 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 4105 yield self.url_result(shelf_url, video_title=title)
4106 # Shelf may not contain shelf URL, fallback to extraction from content
86e5f3ed 4107 yield from self._shelf_entries_from_content(shelf_renderer)
c5e8d7af 4108
8bdd16b4 4109 def _playlist_entries(self, video_list_renderer):
4110 for content in video_list_renderer['contents']:
4111 if not isinstance(content, dict):
4112 continue
4113 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4114 if not isinstance(renderer, dict):
4115 continue
4116 video_id = renderer.get('videoId')
4117 if not video_id:
4118 continue
4119 yield self._extract_video(renderer)
07aeced6 4120
3462ffa8 4121 def _rich_entries(self, rich_grid_renderer):
4122 renderer = try_get(
70d5c17b 4123 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 4124 video_id = renderer.get('videoId')
4125 if not video_id:
4126 return
4127 yield self._extract_video(renderer)
4128
8bdd16b4 4129 def _video_entry(self, video_renderer):
4130 video_id = video_renderer.get('videoId')
4131 if video_id:
4132 return self._extract_video(video_renderer)
dacb3a86 4133
ad210f4f 4134 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4135 url = urljoin('https://youtube.com', traverse_obj(
4136 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4137 if url:
4138 return self.url_result(
4139 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4140
8bdd16b4 4141 def _post_thread_entries(self, post_thread_renderer):
4142 post_renderer = try_get(
4143 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4144 if not post_renderer:
4145 return
4146 # video attachment
4147 video_renderer = try_get(
895b0931 4148 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4149 video_id = video_renderer.get('videoId')
4150 if video_id:
4151 entry = self._extract_video(video_renderer)
8bdd16b4 4152 if entry:
4153 yield entry
895b0931 4154 # playlist attachment
4155 playlist_id = try_get(
14f25df2 4156 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
895b0931 4157 if playlist_id:
4158 yield self.url_result(
e28f1c0a 4159 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4160 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4161 # inline video links
4162 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4163 for run in runs:
4164 if not isinstance(run, dict):
4165 continue
4166 ep_url = try_get(
14f25df2 4167 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
8bdd16b4 4168 if not ep_url:
4169 continue
4170 if not YoutubeIE.suitable(ep_url):
4171 continue
4172 ep_video_id = YoutubeIE._match_id(ep_url)
4173 if video_id == ep_video_id:
4174 continue
895b0931 4175 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 4176
8bdd16b4 4177 def _post_thread_continuation_entries(self, post_thread_continuation):
4178 contents = post_thread_continuation.get('contents')
4179 if not isinstance(contents, list):
4180 return
4181 for content in contents:
4182 renderer = content.get('backstagePostThreadRenderer')
6b0b0a28 4183 if isinstance(renderer, dict):
4184 yield from self._post_thread_entries(renderer)
8bdd16b4 4185 continue
6b0b0a28 4186 renderer = content.get('videoRenderer')
4187 if isinstance(renderer, dict):
4188 yield self._video_entry(renderer)
07aeced6 4189
39ed931e 4190 r''' # unused
4191 def _rich_grid_entries(self, contents):
4192 for content in contents:
4193 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4194 if video_renderer:
4195 entry = self._video_entry(video_renderer)
4196 if entry:
4197 yield entry
4198 '''
52efa4b3 4199
a6213a49 4200 def _extract_entries(self, parent_renderer, continuation_list):
4201 # continuation_list is modified in-place with continuation_list = [continuation_token]
4202 continuation_list[:] = [None]
4203 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4204 for content in contents:
4205 if not isinstance(content, dict):
4206 continue
16aa9ea4 4207 is_renderer = traverse_obj(
4208 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4209 expected_type=dict)
a6213a49 4210 if not is_renderer:
4211 renderer = content.get('richItemRenderer')
4212 if renderer:
4213 for entry in self._rich_entries(renderer):
4214 yield entry
4215 continuation_list[0] = self._extract_continuation(parent_renderer)
4216 continue
4217 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4218 for isr_content in isr_contents:
4219 if not isinstance(isr_content, dict):
8bdd16b4 4220 continue
69184e41 4221
a6213a49 4222 known_renderers = {
4223 'playlistVideoListRenderer': self._playlist_entries,
4224 'gridRenderer': self._grid_entries,
a17526e4 4225 'reelShelfRenderer': self._grid_entries,
4226 'shelfRenderer': self._shelf_entries,
16aa9ea4 4227 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
a6213a49 4228 'backstagePostThreadRenderer': self._post_thread_entries,
4229 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 4230 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4231 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
ad210f4f 4232 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
a6213a49 4233 }
4234 for key, renderer in isr_content.items():
4235 if key not in known_renderers:
4236 continue
4237 for entry in known_renderers[key](renderer):
4238 if entry:
4239 yield entry
4240 continuation_list[0] = self._extract_continuation(renderer)
4241 break
70d5c17b 4242
4243 if not continuation_list[0]:
a6213a49 4244 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 4245
a6213a49 4246 if not continuation_list[0]:
4247 continuation_list[0] = self._extract_continuation(parent_renderer)
4248
4249 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4250 continuation_list = [None]
4251 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 4252 tab_content = try_get(tab, lambda x: x['content'], dict)
4253 if not tab_content:
4254 return
3462ffa8 4255 parent_renderer = (
29f7c58a 4256 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4257 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
86e5f3ed 4258 yield from extract_entries(parent_renderer)
3462ffa8 4259 continuation = continuation_list[0]
d069eca7 4260
8bdd16b4 4261 for page_num in itertools.count(1):
4262 if not continuation:
4263 break
99e9e001 4264 headers = self.generate_api_headers(
4265 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 4266 response = self._extract_response(
86e5f3ed 4267 item_id=f'{item_id} page {page_num}',
fe93e2c4 4268 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 4269 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
4270
4271 if not response:
8bdd16b4 4272 break
ac56cf38 4273 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4274 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4275 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 4276
69184e41 4277 known_continuation_renderers = {
4278 'playlistVideoListContinuation': self._playlist_entries,
4279 'gridContinuation': self._grid_entries,
4280 'itemSectionContinuation': self._post_thread_continuation_entries,
4281 'sectionListContinuation': extract_entries, # for feeds
4282 }
8bdd16b4 4283 continuation_contents = try_get(
69184e41 4284 response, lambda x: x['continuationContents'], dict) or {}
4285 continuation_renderer = None
4286 for key, value in continuation_contents.items():
4287 if key not in known_continuation_renderers:
3462ffa8 4288 continue
69184e41 4289 continuation_renderer = value
4290 continuation_list = [None]
86e5f3ed 4291 yield from known_continuation_renderers[key](continuation_renderer)
69184e41 4292 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
4293 break
4294 if continuation_renderer:
4295 continue
c5e8d7af 4296
a1b535bd 4297 known_renderers = {
e4b98809 4298 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
a1b535bd 4299 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4300 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 4301 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 4302 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 4303 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 4304 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 4305 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 4306 }
cce889b9 4307 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 4308 continuation_items = try_get(
cce889b9 4309 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 4310 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
4311 video_items_renderer = None
4312 for key, value in continuation_item.items():
4313 if key not in known_renderers:
8bdd16b4 4314 continue
a1b535bd 4315 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 4316 continuation_list = [None]
86e5f3ed 4317 yield from known_renderers[key][0](video_items_renderer)
9ba5705a 4318 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 4319 break
4320 if video_items_renderer:
4321 continue
8bdd16b4 4322 break
9558dcec 4323
8bdd16b4 4324 @staticmethod
7c219ea6 4325 def _extract_selected_tab(tabs, fatal=True):
8bdd16b4 4326 for tab in tabs:
cd684175 4327 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4328 if renderer.get('selected') is True:
4329 return renderer
2b3c2546 4330 else:
7c219ea6 4331 if fatal:
4332 raise ExtractorError('Unable to find selected tab')
b82f815f 4333
61d3665d 4334 def _extract_uploader(self, data):
8bdd16b4 4335 uploader = {}
61d3665d 4336 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
47193e02 4337 owner = try_get(
4338 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4339 if owner:
61d3665d 4340 owner_text = owner.get('text')
4341 uploader['uploader'] = self._search_regex(
4342 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
47193e02 4343 uploader['uploader_id'] = try_get(
14f25df2 4344 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
47193e02 4345 uploader['uploader_url'] = urljoin(
4346 'https://www.youtube.com/',
14f25df2 4347 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
9c3fe2ef 4348 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 4349
ac56cf38 4350 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 4351 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 4352 tags = []
b60419c5 4353
8bdd16b4 4354 selected_tab = self._extract_selected_tab(tabs)
f0d785d3 4355 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
8bdd16b4 4356 renderer = try_get(
4357 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4358 if renderer:
b60419c5 4359 channel_name = renderer.get('title')
4360 channel_url = renderer.get('channelUrl')
4361 channel_id = renderer.get('externalId')
39ed931e 4362 else:
64c0d954 4363 renderer = try_get(
4364 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 4365
8bdd16b4 4366 if renderer:
4367 title = renderer.get('title')
ecc97af3 4368 description = renderer.get('description', '')
b60419c5 4369 playlist_id = channel_id
4370 tags = renderer.get('keywords', '').split()
b60419c5 4371
301d07fc 4372 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4373 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4374 def _get_uncropped(url):
4375 return url_or_none((url or '').split('=')[0] + '=s0')
4376
4377 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4378 if avatar_thumbnails:
4379 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4380 if uncropped_avatar:
4381 avatar_thumbnails.append({
4382 'url': uncropped_avatar,
4383 'id': 'avatar_uncropped',
4384 'preference': 1
4385 })
4386
4387 channel_banners = self._extract_thumbnails(
4388 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4389 for banner in channel_banners:
4390 banner['preference'] = -10
4391
4392 if channel_banners:
4393 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4394 if uncropped_banner:
4395 channel_banners.append({
4396 'url': uncropped_banner,
4397 'id': 'banner_uncropped',
4398 'preference': -5
4399 })
4400
4401 primary_thumbnails = self._extract_thumbnails(
a17526e4 4402 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
a709d873 4403
3462ffa8 4404 if playlist_id is None:
70d5c17b 4405 playlist_id = item_id
f0d785d3 4406
4407 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
4408 last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
70d5c17b 4409 if title is None:
f0d785d3 4410 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
b60419c5 4411 title += format_field(selected_tab, 'title', ' - %s')
cd684175 4412 title += format_field(selected_tab, 'expandedText', ' - %s')
f0d785d3 4413
b60419c5 4414 metadata = {
4415 'playlist_id': playlist_id,
4416 'playlist_title': title,
4417 'playlist_description': description,
4418 'uploader': channel_name,
4419 'uploader_id': channel_id,
4420 'uploader_url': channel_url,
301d07fc 4421 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
b60419c5 4422 'tags': tags,
f0d785d3 4423 'view_count': self._get_count(playlist_stats, 1),
4424 'availability': self._extract_availability(data),
4425 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
6c73052c 4426 'playlist_count': self._get_count(playlist_stats, 0),
4427 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
b60419c5 4428 }
4429 if not channel_id:
4430 metadata.update(self._extract_uploader(data))
4431 metadata.update({
4432 'channel': metadata['uploader'],
4433 'channel_id': metadata['uploader_id'],
4434 'channel_url': metadata['uploader_url']})
4435 return self.playlist_result(
d069eca7 4436 self._entries(
ac56cf38 4437 selected_tab, playlist_id, ytcfg,
4438 self._extract_account_syncid(ytcfg, data),
4439 self._extract_visitor_data(data, ytcfg)),
b60419c5 4440 **metadata)
73c4ac2c 4441
6e634cbe 4442 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
ac56cf38 4443 first_id = last_id = response = None
2be71994 4444 for page_num in itertools.count(1):
cd7c66cf 4445 videos = list(self._playlist_entries(playlist))
4446 if not videos:
4447 return
2be71994 4448 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4449 if start >= len(videos):
4450 return
24146491 4451 yield from videos[start:]
2be71994 4452 first_id = first_id or videos[0]['id']
4453 last_id = videos[-1]['id']
79360d99 4454 watch_endpoint = try_get(
4455 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 4456 headers = self.generate_api_headers(
4457 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4458 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 4459 query = {
4460 'playlistId': playlist_id,
4461 'videoId': watch_endpoint.get('videoId') or last_id,
4462 'index': watch_endpoint.get('index') or len(videos),
4463 'params': watch_endpoint.get('params') or 'OAE%3D'
4464 }
4465 response = self._extract_response(
4466 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4467 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4468 check_get_keys='contents'
4469 )
cd7c66cf 4470 playlist = try_get(
79360d99 4471 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4472
ac56cf38 4473 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 4474 title = playlist.get('title') or try_get(
14f25df2 4475 data, lambda x: x['titleText']['simpleText'], str)
8bdd16b4 4476 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4477
4478 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4479 playlist_url = urljoin(url, try_get(
4480 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4481 str))
6e634cbe 4482
4483 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4484 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4485 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4486
4487 if playlist_url and playlist_url != url and not is_known_unviewable:
29f7c58a 4488 return self.url_result(
4489 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4490 video_title=title)
cd7c66cf 4491
8bdd16b4 4492 return self.playlist_result(
6e634cbe 4493 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 4494 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4495
47193e02 4496 def _extract_availability(self, data):
4497 """
4498 Gets the availability of a given playlist/tab.
4499 Note: Unless YouTube tells us explicitly, we do not assume it is public
4500 @param data: response
4501 """
4502 is_private = is_unlisted = None
4503 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4504 badge_labels = self._extract_badges(renderer)
4505
4506 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4507 privacy_dropdown_entries = try_get(
4508 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4509 for renderer_dict in privacy_dropdown_entries:
4510 is_selected = try_get(
4511 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4512 if not is_selected:
4513 continue
052e1350 4514 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4515 if label:
4516 badge_labels.add(label.lower())
4517 break
4518
4519 for badge_label in badge_labels:
4520 if badge_label == 'unlisted':
4521 is_unlisted = True
4522 elif badge_label == 'private':
4523 is_private = True
4524 elif badge_label == 'public':
4525 is_unlisted = is_private = False
4526 return self._availability(is_private, False, False, False, is_unlisted)
4527
4528 @staticmethod
4529 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4530 sidebar_renderer = try_get(
4531 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4532 for item in sidebar_renderer:
4533 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4534 if renderer:
4535 return renderer
4536
ac56cf38 4537 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 4538 """
4539 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4540 """
5d342002 4541 browse_id = params = None
47193e02 4542 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4543 if not renderer:
4544 return
4545 menu_renderer = try_get(
4546 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4547 for menu_item in menu_renderer:
4548 if not isinstance(menu_item, dict):
358de58c 4549 continue
47193e02 4550 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4551 text = try_get(
14f25df2 4552 nav_item_renderer, lambda x: x['text']['simpleText'], str)
47193e02 4553 if not text or text.lower() != 'show unavailable videos':
4554 continue
4555 browse_endpoint = try_get(
4556 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4557 browse_id = browse_endpoint.get('browseId')
4558 params = browse_endpoint.get('params')
4559 break
5d342002 4560
11f9be09 4561 headers = self.generate_api_headers(
99e9e001 4562 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 4563 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 4564 query = {
4565 'params': params or 'wgYCCAA=',
4566 'browseId': browse_id or 'VL%s' % item_id
4567 }
4568 return self._extract_response(
4569 item_id=item_id, headers=headers, query=query,
fe93e2c4 4570 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4571 note='Downloading API JSON with unavailable videos')
358de58c 4572
2762dbb1 4573 @functools.cached_property
a25bca9f 4574 def skip_webpage(self):
4575 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4576
ac56cf38 4577 def _extract_webpage(self, url, item_id, fatal=True):
be5c1ae8 4578 webpage, data = None, None
4579 for retry in self.RetryManager(fatal=fatal):
ac56cf38 4580 try:
be5c1ae8 4581 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
ac56cf38 4582 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4583 except ExtractorError as e:
4584 if isinstance(e.cause, network_exceptions):
14f25df2 4585 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
be5c1ae8 4586 retry.error = e
4587 continue
4588 self._error_or_warning(e, fatal=fatal)
14fdfea9 4589 break
ac56cf38 4590
be5c1ae8 4591 try:
4592 self._extract_and_report_alerts(data)
4593 except ExtractorError as e:
4594 self._error_or_warning(e, fatal=fatal)
4595 break
ac56cf38 4596
be5c1ae8 4597 # Sometimes youtube returns a webpage with incomplete ytInitialData
4598 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4599 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
4600 retry.error = ExtractorError('Incomplete yt initial data received')
4601 continue
ac56cf38 4602
cd7c66cf 4603 return webpage, data
4604
a25bca9f 4605 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4606 """Use if failed to extract ytcfg (and data) from initial webpage"""
4607 if not ytcfg and self.is_authenticated:
4608 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4609 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4610 raise ExtractorError(
4611 f'{msg}. If you are not downloading private content, or '
4612 'your cookies are only for the first account and channel,'
4613 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4614 expected=True)
4615 self.report_warning(msg, only_once=True)
4616
ac56cf38 4617 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4618 data = None
a25bca9f 4619 if not self.skip_webpage:
ac56cf38 4620 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4621 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
1108613f 4622 # Reject webpage data if redirected to home page without explicitly requesting
4623 selected_tab = self._extract_selected_tab(traverse_obj(
7c219ea6 4624 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
1108613f 4625 if (url != 'https://www.youtube.com/feed/recommended'
4626 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4627 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4628 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4629 if fatal:
4630 raise ExtractorError(msg, expected=True)
4631 self.report_warning(msg, only_once=True)
ac56cf38 4632 if not data:
a25bca9f 4633 self._report_playlist_authcheck(ytcfg, fatal=fatal)
ac56cf38 4634 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4635 return data, ytcfg
4636
4637 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4638 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4639 resolve_response = self._extract_response(
4640 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4641 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4642 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4643 for ep_key, ep in endpoints.items():
4644 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4645 if params:
4646 return self._extract_response(
4647 item_id=item_id, query=params, ep=ep, headers=headers,
4648 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
7c219ea6 4649 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
ac56cf38 4650 err_note = 'Failed to resolve url (does the playlist exist?)'
4651 if fatal:
4652 raise ExtractorError(err_note, expected=True)
4653 self.report_warning(err_note, item_id)
4654
a6213a49 4655 _SEARCH_PARAMS = None
4656
af5c1c55 4657 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
a6213a49 4658 data = {'query': query}
4659 if params is NO_DEFAULT:
4660 params = self._SEARCH_PARAMS
4661 if params:
4662 data['params'] = params
16aa9ea4 4663
4664 content_keys = (
4665 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4666 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4667 # ytmusic search
4668 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4669 ('continuationContents', ),
4670 )
a25bca9f 4671 display_id = f'query "{query}"'
86e5f3ed 4672 check_get_keys = tuple({keys[0] for keys in content_keys})
a25bca9f 4673 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4674 self._report_playlist_authcheck(ytcfg, fatal=False)
16aa9ea4 4675
a61fd4cf 4676 continuation_list = [None]
a25bca9f 4677 search = None
a6213a49 4678 for page_num in itertools.count(1):
a61fd4cf 4679 data.update(continuation_list[0] or {})
a25bca9f 4680 headers = self.generate_api_headers(
4681 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
a6213a49 4682 search = self._extract_response(
a25bca9f 4683 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4684 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
16aa9ea4 4685 slr_contents = traverse_obj(search, *content_keys)
4686 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
a61fd4cf 4687 if not continuation_list[0]:
a6213a49 4688 break
4689
4690
4691class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4692 IE_DESC = 'YouTube Tabs'
4693 _VALID_URL = r'''(?x:
4694 https?://
4695 (?:\w+\.)?
4696 (?:
4697 youtube(?:kids)?\.com|
4698 %(invidious)s
4699 )/
4700 (?:
4701 (?P<channel_type>channel|c|user|browse)/|
4702 (?P<not_channel>
4703 feed/|hashtag/|
4704 (?:playlist|watch)\?.*?\blist=
4705 )|
4706 (?!(?:%(reserved_names)s)\b) # Direct URLs
4707 )
4708 (?P<id>[^/?\#&]+)
4709 )''' % {
4710 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4711 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4712 }
4713 IE_NAME = 'youtube:tab'
4714
4715 _TESTS = [{
4716 'note': 'playlists, multipage',
4717 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4718 'playlist_mincount': 94,
4719 'info_dict': {
4720 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4721 'title': 'Igor Kleiner - Playlists',
a6213a49 4722 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 4723 'uploader': 'Igor Kleiner',
a6213a49 4724 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4725 'channel': 'Igor Kleiner',
4726 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4727 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4728 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4729 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4730 'channel_follower_count': int
a6213a49 4731 },
4732 }, {
4733 'note': 'playlists, multipage, different order',
4734 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4735 'playlist_mincount': 94,
4736 'info_dict': {
4737 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4738 'title': 'Igor Kleiner - Playlists',
a6213a49 4739 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4740 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4741 'uploader': 'Igor Kleiner',
4742 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4743 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4744 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4745 'channel': 'Igor Kleiner',
4746 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4747 'channel_follower_count': int
a6213a49 4748 },
4749 }, {
4750 'note': 'playlists, series',
4751 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4752 'playlist_mincount': 5,
4753 'info_dict': {
4754 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4755 'title': '3Blue1Brown - Playlists',
4756 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4757 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4758 'uploader': '3Blue1Brown',
976ae3ea 4759 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4760 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4761 'channel': '3Blue1Brown',
4762 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4763 'tags': ['Mathematics'],
6c73052c 4764 'channel_follower_count': int
a6213a49 4765 },
4766 }, {
4767 'note': 'playlists, singlepage',
4768 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4769 'playlist_mincount': 4,
4770 'info_dict': {
4771 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4772 'title': 'ThirstForScience - Playlists',
4773 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4774 'uploader': 'ThirstForScience',
4775 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
976ae3ea 4776 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4777 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4778 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4779 'tags': 'count:13',
4780 'channel': 'ThirstForScience',
6c73052c 4781 'channel_follower_count': int
a6213a49 4782 }
4783 }, {
4784 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4785 'only_matching': True,
4786 }, {
4787 'note': 'basic, single video playlist',
4788 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4789 'info_dict': {
4790 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4791 'uploader': 'Sergey M.',
4792 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4793 'title': 'youtube-dl public playlist',
976ae3ea 4794 'description': '',
4795 'tags': [],
4796 'view_count': int,
4797 'modified_date': '20201130',
4798 'channel': 'Sergey M.',
4799 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4800 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4801 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4802 },
4803 'playlist_count': 1,
4804 }, {
4805 'note': 'empty playlist',
4806 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4807 'info_dict': {
4808 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4809 'uploader': 'Sergey M.',
4810 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4811 'title': 'youtube-dl empty playlist',
976ae3ea 4812 'tags': [],
4813 'channel': 'Sergey M.',
4814 'description': '',
4815 'modified_date': '20160902',
4816 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4817 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4818 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4819 },
4820 'playlist_count': 0,
4821 }, {
4822 'note': 'Home tab',
4823 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
4824 'info_dict': {
4825 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4826 'title': 'lex will - Home',
4827 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4828 'uploader': 'lex will',
4829 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4830 'channel': 'lex will',
4831 'tags': ['bible', 'history', 'prophesy'],
4832 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4833 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4834 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 4835 'channel_follower_count': int
a6213a49 4836 },
4837 'playlist_mincount': 2,
4838 }, {
4839 'note': 'Videos tab',
4840 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4841 'info_dict': {
4842 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4843 'title': 'lex will - Videos',
4844 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4845 'uploader': 'lex will',
4846 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4847 'tags': ['bible', 'history', 'prophesy'],
4848 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4849 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4850 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4851 'channel': 'lex will',
6c73052c 4852 'channel_follower_count': int
a6213a49 4853 },
4854 'playlist_mincount': 975,
4855 }, {
4856 'note': 'Videos tab, sorted by popular',
4857 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4858 'info_dict': {
4859 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4860 'title': 'lex will - Videos',
4861 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4862 'uploader': 'lex will',
4863 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4864 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4865 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4866 'channel': 'lex will',
4867 'tags': ['bible', 'history', 'prophesy'],
4868 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 4869 'channel_follower_count': int
a6213a49 4870 },
4871 'playlist_mincount': 199,
4872 }, {
4873 'note': 'Playlists tab',
4874 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4875 'info_dict': {
4876 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4877 'title': 'lex will - Playlists',
4878 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4879 'uploader': 'lex will',
4880 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4881 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4882 'channel': 'lex will',
4883 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4884 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4885 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4886 'channel_follower_count': int
a6213a49 4887 },
4888 'playlist_mincount': 17,
4889 }, {
4890 'note': 'Community tab',
4891 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4892 'info_dict': {
4893 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4894 'title': 'lex will - Community',
4895 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4896 'uploader': 'lex will',
4897 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4898 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4899 'channel': 'lex will',
4900 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4901 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4902 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4903 'channel_follower_count': int
a6213a49 4904 },
4905 'playlist_mincount': 18,
4906 }, {
4907 'note': 'Channels tab',
4908 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4909 'info_dict': {
4910 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4911 'title': 'lex will - Channels',
4912 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4913 'uploader': 'lex will',
4914 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4915 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4916 'channel': 'lex will',
4917 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4918 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4919 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4920 'channel_follower_count': int
a6213a49 4921 },
4922 'playlist_mincount': 12,
4923 }, {
4924 'note': 'Search tab',
4925 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4926 'playlist_mincount': 40,
4927 'info_dict': {
4928 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4929 'title': '3Blue1Brown - Search - linear algebra',
4930 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4931 'uploader': '3Blue1Brown',
4932 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 4933 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4934 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4935 'tags': ['Mathematics'],
4936 'channel': '3Blue1Brown',
4937 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6c73052c 4938 'channel_follower_count': int
a6213a49 4939 },
4940 }, {
4941 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4942 'only_matching': True,
4943 }, {
4944 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4945 'only_matching': True,
4946 }, {
4947 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4948 'only_matching': True,
4949 }, {
4950 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4951 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4952 'info_dict': {
4953 'title': '29C3: Not my department',
4954 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4955 'uploader': 'Christiaan008',
4956 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4957 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 4958 'tags': [],
4959 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4960 'view_count': int,
4961 'modified_date': '20150605',
4962 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4963 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4964 'channel': 'Christiaan008',
a6213a49 4965 },
4966 'playlist_count': 96,
4967 }, {
4968 'note': 'Large playlist',
4969 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4970 'info_dict': {
4971 'title': 'Uploads from Cauchemar',
4972 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4973 'uploader': 'Cauchemar',
4974 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 4975 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
4976 'tags': [],
4977 'modified_date': r're:\d{8}',
4978 'channel': 'Cauchemar',
4979 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
4980 'view_count': int,
4981 'description': '',
4982 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
a6213a49 4983 },
4984 'playlist_mincount': 1123,
976ae3ea 4985 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4986 }, {
4987 'note': 'even larger playlist, 8832 videos',
4988 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4989 'only_matching': True,
4990 }, {
4991 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4992 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4993 'info_dict': {
4994 'title': 'Uploads from Interstellar Movie',
4995 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4996 'uploader': 'Interstellar Movie',
4997 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 4998 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
4999 'tags': [],
5000 'view_count': int,
5001 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5002 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
5003 'channel': 'Interstellar Movie',
5004 'description': '',
5005 'modified_date': r're:\d{8}',
a6213a49 5006 },
5007 'playlist_mincount': 21,
5008 }, {
5009 'note': 'Playlist with "show unavailable videos" button',
5010 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5011 'info_dict': {
5012 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5013 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5014 'uploader': 'Phim Siêu Nhân Nhật Bản',
5015 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 5016 'view_count': int,
5017 'channel': 'Phim Siêu Nhân Nhật Bản',
5018 'tags': [],
5019 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5020 'description': '',
5021 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5022 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5023 'modified_date': r're:\d{8}',
a6213a49 5024 },
5025 'playlist_mincount': 200,
976ae3ea 5026 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5027 }, {
5028 'note': 'Playlist with unavailable videos in page 7',
5029 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5030 'info_dict': {
5031 'title': 'Uploads from BlankTV',
5032 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5033 'uploader': 'BlankTV',
5034 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5035 'channel': 'BlankTV',
5036 'channel_url': 'https://www.youtube.com/c/blanktv',
5037 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5038 'view_count': int,
5039 'tags': [],
5040 'uploader_url': 'https://www.youtube.com/c/blanktv',
5041 'modified_date': r're:\d{8}',
5042 'description': '',
a6213a49 5043 },
5044 'playlist_mincount': 1000,
976ae3ea 5045 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5046 }, {
5047 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5048 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5049 'info_dict': {
5050 'title': 'Data Analysis with Dr Mike Pound',
5051 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5052 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5053 'uploader': 'Computerphile',
5054 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 5055 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5056 'tags': [],
5057 'view_count': int,
5058 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5059 'channel_url': 'https://www.youtube.com/user/Computerphile',
5060 'channel': 'Computerphile',
a6213a49 5061 },
5062 'playlist_mincount': 11,
5063 }, {
5064 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5065 'only_matching': True,
5066 }, {
5067 'note': 'Playlist URL that does not actually serve a playlist',
5068 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5069 'info_dict': {
5070 'id': 'FqZTN594JQw',
5071 'ext': 'webm',
5072 'title': "Smiley's People 01 detective, Adventure Series, Action",
5073 'uploader': 'STREEM',
5074 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5075 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5076 'upload_date': '20150526',
5077 'license': 'Standard YouTube License',
5078 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5079 'categories': ['People & Blogs'],
5080 'tags': list,
5081 'view_count': int,
5082 'like_count': int,
a6213a49 5083 },
5084 'params': {
5085 'skip_download': True,
5086 },
5087 'skip': 'This video is not available.',
5088 'add_ie': [YoutubeIE.ie_key()],
5089 }, {
5090 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5091 'only_matching': True,
5092 }, {
5093 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5094 'only_matching': True,
5095 }, {
5096 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5097 'info_dict': {
12a1b225 5098 'id': 'Wq15eF5vCbI', # This will keep changing
a6213a49 5099 'ext': 'mp4',
976ae3ea 5100 'title': str,
a6213a49 5101 'uploader': 'Sky News',
5102 'uploader_id': 'skynews',
5103 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5104 'upload_date': r're:\d{8}',
976ae3ea 5105 'description': str,
a6213a49 5106 'categories': ['News & Politics'],
5107 'tags': list,
5108 'like_count': int,
6c73052c 5109 'release_timestamp': 1642502819,
976ae3ea 5110 'channel': 'Sky News',
5111 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5112 'age_limit': 0,
5113 'view_count': int,
6c73052c 5114 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
976ae3ea 5115 'playable_in_embed': True,
6c73052c 5116 'release_date': '20220118',
976ae3ea 5117 'availability': 'public',
5118 'live_status': 'is_live',
5119 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
6c73052c 5120 'channel_follower_count': int
a6213a49 5121 },
5122 'params': {
5123 'skip_download': True,
5124 },
976ae3ea 5125 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 5126 }, {
5127 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5128 'info_dict': {
5129 'id': 'a48o2S1cPoo',
5130 'ext': 'mp4',
5131 'title': 'The Young Turks - Live Main Show',
5132 'uploader': 'The Young Turks',
5133 'uploader_id': 'TheYoungTurks',
5134 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5135 'upload_date': '20150715',
5136 'license': 'Standard YouTube License',
5137 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5138 'categories': ['News & Politics'],
5139 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5140 'like_count': int,
a6213a49 5141 },
5142 'params': {
5143 'skip_download': True,
5144 },
5145 'only_matching': True,
5146 }, {
5147 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5148 'only_matching': True,
5149 }, {
5150 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5151 'only_matching': True,
5152 }, {
5153 'note': 'A channel that is not live. Should raise error',
5154 'url': 'https://www.youtube.com/user/numberphile/live',
5155 'only_matching': True,
5156 }, {
5157 'url': 'https://www.youtube.com/feed/trending',
5158 'only_matching': True,
5159 }, {
5160 'url': 'https://www.youtube.com/feed/library',
5161 'only_matching': True,
5162 }, {
5163 'url': 'https://www.youtube.com/feed/history',
5164 'only_matching': True,
5165 }, {
5166 'url': 'https://www.youtube.com/feed/subscriptions',
5167 'only_matching': True,
5168 }, {
5169 'url': 'https://www.youtube.com/feed/watch_later',
5170 'only_matching': True,
5171 }, {
5172 'note': 'Recommended - redirects to home page.',
5173 'url': 'https://www.youtube.com/feed/recommended',
5174 'only_matching': True,
5175 }, {
5176 'note': 'inline playlist with not always working continuations',
5177 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5178 'only_matching': True,
5179 }, {
5180 'url': 'https://www.youtube.com/course',
5181 'only_matching': True,
5182 }, {
5183 'url': 'https://www.youtube.com/zsecurity',
5184 'only_matching': True,
5185 }, {
5186 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5187 'only_matching': True,
5188 }, {
5189 'url': 'https://www.youtube.com/TheYoungTurks/live',
5190 'only_matching': True,
5191 }, {
5192 'url': 'https://www.youtube.com/hashtag/cctv9',
5193 'info_dict': {
5194 'id': 'cctv9',
5195 'title': '#cctv9',
976ae3ea 5196 'tags': [],
a6213a49 5197 },
5198 'playlist_mincount': 350,
5199 }, {
5200 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5201 'only_matching': True,
5202 }, {
5203 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5204 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5205 'only_matching': True
5206 }, {
5207 'note': '/browse/ should redirect to /channel/',
5208 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5209 'only_matching': True
5210 }, {
5211 'note': 'VLPL, should redirect to playlist?list=PL...',
5212 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5213 'info_dict': {
5214 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5215 'uploader': 'NoCopyrightSounds',
5216 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5217 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
12a1b225 5218 'title': 'NCS : All Releases 💿',
976ae3ea 5219 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5220 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5221 'modified_date': r're:\d{8}',
5222 'view_count': int,
5223 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5224 'tags': [],
5225 'channel': 'NoCopyrightSounds',
a6213a49 5226 },
5227 'playlist_mincount': 166,
976ae3ea 5228 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5229 }, {
5230 'note': 'Topic, should redirect to playlist?list=UU...',
5231 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5232 'info_dict': {
5233 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5234 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5235 'title': 'Uploads from Royalty Free Music - Topic',
5236 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5237 'tags': [],
5238 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5239 'channel': 'Royalty Free Music - Topic',
5240 'view_count': int,
5241 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5242 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5243 'modified_date': r're:\d{8}',
5244 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5245 'description': '',
a6213a49 5246 },
5247 'expected_warnings': [
a6213a49 5248 'The URL does not have a videos tab',
976ae3ea 5249 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5250 ],
5251 'playlist_mincount': 101,
5252 }, {
5253 'note': 'Topic without a UU playlist',
5254 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5255 'info_dict': {
5256 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5257 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 5258 'tags': [],
a6213a49 5259 },
5260 'expected_warnings': [
976ae3ea 5261 'the playlist redirect gave error',
a6213a49 5262 ],
5263 'playlist_mincount': 9,
5264 }, {
5265 'note': 'Youtube music Album',
5266 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5267 'info_dict': {
5268 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5269 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 5270 'tags': [],
5271 'view_count': int,
5272 'description': '',
5273 'availability': 'unlisted',
5274 'modified_date': r're:\d{8}',
a6213a49 5275 },
5276 'playlist_count': 50,
5277 }, {
5278 'note': 'unlisted single video playlist',
5279 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5280 'info_dict': {
5281 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5282 'uploader': 'colethedj',
5283 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5284 'title': 'yt-dlp unlisted playlist test',
976ae3ea 5285 'availability': 'unlisted',
5286 'tags': [],
12a1b225 5287 'modified_date': '20220418',
976ae3ea 5288 'channel': 'colethedj',
5289 'view_count': int,
5290 'description': '',
5291 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5292 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5293 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
a6213a49 5294 },
5295 'playlist_count': 1,
5296 }, {
5297 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5298 'url': 'https://www.youtube.com/feed/recommended',
5299 'info_dict': {
5300 'id': 'recommended',
5301 'title': 'recommended',
6c73052c 5302 'tags': [],
a6213a49 5303 },
5304 'playlist_mincount': 50,
5305 'params': {
5306 'skip_download': True,
5307 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5308 },
5309 }, {
5310 'note': 'API Fallback: /videos tab, sorted by oldest first',
5311 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5312 'info_dict': {
5313 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5314 'title': 'Cody\'sLab - Videos',
5315 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5316 'uploader': 'Cody\'sLab',
5317 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
976ae3ea 5318 'channel': 'Cody\'sLab',
5319 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5320 'tags': [],
5321 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5322 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6c73052c 5323 'channel_follower_count': int
a6213a49 5324 },
5325 'playlist_mincount': 650,
5326 'params': {
5327 'skip_download': True,
5328 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5329 },
5330 }, {
5331 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5332 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5333 'info_dict': {
5334 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5335 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5336 'title': 'Uploads from Royalty Free Music - Topic',
5337 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5338 'modified_date': r're:\d{8}',
5339 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5340 'description': '',
5341 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5342 'tags': [],
5343 'channel': 'Royalty Free Music - Topic',
5344 'view_count': int,
5345 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 5346 },
5347 'expected_warnings': [
976ae3ea 5348 'does not have a videos tab',
5349 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5350 ],
5351 'playlist_mincount': 101,
5352 'params': {
5353 'skip_download': True,
5354 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5355 },
7c219ea6 5356 }, {
5357 'note': 'non-standard redirect to regional channel',
5358 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5359 'only_matching': True
61d3665d 5360 }, {
5361 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5362 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5363 'info_dict': {
5364 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5365 'modified_date': '20220407',
5366 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5367 'tags': [],
5368 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5369 'uploader': 'pukkandan',
5370 'availability': 'unlisted',
5371 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5372 'channel': 'pukkandan',
5373 'description': 'Test for collaborative playlist',
5374 'title': 'yt-dlp test - collaborative playlist',
12a1b225 5375 'view_count': int,
61d3665d 5376 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5377 },
5378 'playlist_mincount': 2
a6213a49 5379 }]
5380
5381 @classmethod
5382 def suitable(cls, url):
86e5f3ed 5383 return False if YoutubeIE.suitable(url) else super().suitable(url)
9297939e 5384
64f36541 5385 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
fe03a6cd 5386
182bda88 5387 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5388 def _real_extract(self, url, smuggled_data):
cd7c66cf 5389 item_id = self._match_id(url)
14f25df2 5390 url = urllib.parse.urlunparse(
5391 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 5392 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 5393
fe03a6cd 5394 def get_mobj(url):
37e57a9f 5395 mobj = self._URL_RE.match(url).groupdict()
07cce701 5396 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 5397 return mobj
5398
37e57a9f 5399 mobj, redirect_warning = get_mobj(url), None
fe03a6cd 5400 # Youtube returns incomplete data if tabname is not lower case
5401 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 5402 if is_channel:
5403 if smuggled_data.get('is_music_url'):
37e57a9f 5404 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
fe03a6cd 5405 item_id = item_id[2:]
37e57a9f 5406 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5407 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
ac56cf38 5408 mdata = self._extract_tab_endpoint(
37e57a9f 5409 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5410 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
14f25df2 5411 get_all=False, expected_type=str)
ac56cf38 5412 if not murl:
37e57a9f 5413 raise ExtractorError('Failed to resolve album to playlist')
ac56cf38 5414 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
37e57a9f 5415 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5416 pre = f'https://www.youtube.com/channel/{item_id}'
5417
64f36541 5418 original_tab_name = tab
fe03a6cd 5419 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5420 # Home URLs should redirect to /videos/
37e57a9f 5421 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5422 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 5423 tab = '/videos'
5424
5425 url = ''.join((pre, tab, post))
5426 mobj = get_mobj(url)
cd7c66cf 5427
5428 # Handle both video/playlist URLs
201c1459 5429 qs = parse_qs(url)
86e5f3ed 5430 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
cd7c66cf 5431
fe03a6cd 5432 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 5433 if not playlist_id:
fe03a6cd 5434 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 5435 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 5436 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 5437 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5438 url = f'https://www.youtube.com/playlist?list={playlist_id}'
18db7548 5439 mobj = get_mobj(url)
cd7c66cf 5440
5441 if video_id and playlist_id:
a06916d9 5442 if self.get_param('noplaylist'):
37e57a9f 5443 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5444 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5445 ie=YoutubeIE.ie_key(), video_id=video_id)
5446 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
cd7c66cf 5447
ac56cf38 5448 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 5449
7c219ea6 5450 # YouTube may provide a non-standard redirect to the regional channel
5451 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5452 redirect_url = traverse_obj(
5453 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5454 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5455 redirect_url = ''.join((
5456 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5457 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5458 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5459
37e57a9f 5460 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
18db7548 5461 if tabs:
5462 selected_tab = self._extract_selected_tab(tabs)
64f36541 5463 selected_tab_name = selected_tab.get('title', '').lower()
5464 if selected_tab_name == 'home':
5465 selected_tab_name = 'featured'
5466 requested_tab_name = mobj['tab'][1:]
09f1580e 5467 if 'no-youtube-channel-redirect' not in compat_opts:
693f0600 5468 if requested_tab_name == 'live': # Live tab should have redirected to the video
5469 raise UserNotLive(video_id=mobj['id'])
64f36541 5470 if requested_tab_name not in ('', selected_tab_name):
5471 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5472 if not original_tab_name:
5473 if item_id[:2] == 'UC':
5474 # Topic channels don't have /videos. Use the equivalent playlist instead
5475 pl_id = f'UU{item_id[2:]}'
5476 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5477 try:
5478 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5479 except ExtractorError:
5480 redirect_warning += ' and the playlist redirect gave error'
5481 else:
5482 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5483 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5484 if selected_tab_name and selected_tab_name != requested_tab_name:
5485 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5486 else:
5487 raise ExtractorError(redirect_warning, expected=True)
18db7548 5488
37e57a9f 5489 if redirect_warning:
64f36541 5490 self.to_screen(redirect_warning)
37e57a9f 5491 self.write_debug(f'Final URL: {url}')
18db7548 5492
358de58c 5493 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 5494 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 5495 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 5496 self._extract_and_report_alerts(data, only_once=True)
37e57a9f 5497 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
8bdd16b4 5498 if tabs:
ac56cf38 5499 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 5500
37e57a9f 5501 playlist = traverse_obj(
5502 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 5503 if playlist:
ac56cf38 5504 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 5505
37e57a9f 5506 video_id = traverse_obj(
5507 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 5508 if video_id:
09f1580e 5509 if mobj['tab'] != '/live': # live tab is expected to redirect to video
37e57a9f 5510 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5511 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5512 ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 5513
8bdd16b4 5514 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 5515
c5e8d7af 5516
8bdd16b4 5517class YoutubePlaylistIE(InfoExtractor):
96565c7e 5518 IE_DESC = 'YouTube playlists'
8bdd16b4 5519 _VALID_URL = r'''(?x)(?:
5520 (?:https?://)?
5521 (?:\w+\.)?
5522 (?:
5523 (?:
5524 youtube(?:kids)?\.com|
d9190e44 5525 %(invidious)s
8bdd16b4 5526 )
5527 /.*?\?.*?\blist=
5528 )?
5529 (?P<id>%(playlist_id)s)
d9190e44
RH
5530 )''' % {
5531 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5532 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5533 }
8bdd16b4 5534 IE_NAME = 'youtube:playlist'
cdc628a4 5535 _TESTS = [{
8bdd16b4 5536 'note': 'issue #673',
5537 'url': 'PLBB231211A4F62143',
cdc628a4 5538 'info_dict': {
8bdd16b4 5539 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5540 'id': 'PLBB231211A4F62143',
976ae3ea 5541 'uploader': 'Wickman',
8bdd16b4 5542 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 5543 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 5544 'view_count': int,
5545 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5546 'modified_date': r're:\d{8}',
5547 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5548 'channel': 'Wickman',
5549 'tags': [],
5550 'channel_url': 'https://www.youtube.com/user/Wickydoo',
8bdd16b4 5551 },
5552 'playlist_mincount': 29,
5553 }, {
5554 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5555 'info_dict': {
5556 'title': 'YDL_safe_search',
5557 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5558 },
5559 'playlist_count': 2,
5560 'skip': 'This playlist is private',
9558dcec 5561 }, {
8bdd16b4 5562 'note': 'embedded',
5563 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5564 'playlist_count': 4,
9558dcec 5565 'info_dict': {
8bdd16b4 5566 'title': 'JODA15',
5567 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5568 'uploader': 'milan',
5569 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
976ae3ea 5570 'description': '',
5571 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5572 'tags': [],
5573 'modified_date': '20140919',
5574 'view_count': int,
5575 'channel': 'milan',
5576 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5577 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5578 },
5579 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
cdc628a4 5580 }, {
8bdd16b4 5581 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
12a1b225 5582 'playlist_mincount': 455,
8bdd16b4 5583 'info_dict': {
5584 'title': '2018 Chinese New Singles (11/6 updated)',
5585 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5586 'uploader': 'LBK',
5587 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 5588 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 5589 'channel': 'LBK',
5590 'view_count': int,
5591 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5592 'tags': [],
5593 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5594 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5595 'modified_date': r're:\d{8}',
5596 },
5597 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 5598 }, {
29f7c58a 5599 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5600 'only_matching': True,
5601 }, {
5602 # music album playlist
5603 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5604 'only_matching': True,
5605 }]
5606
5607 @classmethod
5608 def suitable(cls, url):
201c1459 5609 if YoutubeTabIE.suitable(url):
5610 return False
49a57e70 5611 from ..utils import parse_qs
201c1459 5612 qs = parse_qs(url)
5613 if qs.get('v', [None])[0]:
5614 return False
86e5f3ed 5615 return super().suitable(url)
29f7c58a 5616
5617 def _real_extract(self, url):
5618 playlist_id = self._match_id(url)
46953e7e 5619 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 5620 url = update_url_query(
5621 'https://www.youtube.com/playlist',
5622 parse_qs(url) or {'list': playlist_id})
5623 if is_music_url:
5624 url = smuggle_url(url, {'is_music_url': True})
5625 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 5626
5627
5628class YoutubeYtBeIE(InfoExtractor):
c76eb41b 5629 IE_DESC = 'youtu.be'
29f7c58a 5630 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5631 _TESTS = [{
8bdd16b4 5632 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5633 'info_dict': {
5634 'id': 'yeWKywCrFtk',
5635 'ext': 'mp4',
5636 'title': 'Small Scale Baler and Braiding Rugs',
5637 'uploader': 'Backus-Page House Museum',
5638 'uploader_id': 'backuspagemuseum',
5639 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5640 'upload_date': '20161008',
5641 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5642 'categories': ['Nonprofits & Activism'],
5643 'tags': list,
5644 'like_count': int,
976ae3ea 5645 'age_limit': 0,
5646 'playable_in_embed': True,
5647 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5648 'channel': 'Backus-Page House Museum',
5649 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5650 'live_status': 'not_live',
5651 'view_count': int,
5652 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5653 'availability': 'public',
5654 'duration': 59,
12a1b225
A
5655 'comment_count': int,
5656 'channel_follower_count': int
8bdd16b4 5657 },
5658 'params': {
5659 'noplaylist': True,
5660 'skip_download': True,
5661 },
39e7107d 5662 }, {
8bdd16b4 5663 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 5664 'only_matching': True,
cdc628a4
PH
5665 }]
5666
8bdd16b4 5667 def _real_extract(self, url):
5ad28e7f 5668 mobj = self._match_valid_url(url)
29f7c58a 5669 video_id = mobj.group('id')
5670 playlist_id = mobj.group('playlist_id')
8bdd16b4 5671 return self.url_result(
29f7c58a 5672 update_url_query('https://www.youtube.com/watch', {
5673 'v': video_id,
5674 'list': playlist_id,
5675 'feature': 'youtu.be',
5676 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 5677
5678
b6ce9bb0 5679class YoutubeLivestreamEmbedIE(InfoExtractor):
5680 IE_DESC = 'YouTube livestream embeds'
5681 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5682 _TESTS = [{
5683 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5684 'only_matching': True,
5685 }]
5686
5687 def _real_extract(self, url):
5688 channel_id = self._match_id(url)
5689 return self.url_result(
5690 f'https://www.youtube.com/channel/{channel_id}/live',
5691 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5692
5693
8bdd16b4 5694class YoutubeYtUserIE(InfoExtractor):
96565c7e 5695 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 5696 IE_NAME = 'youtube:user'
8bdd16b4 5697 _VALID_URL = r'ytuser:(?P<id>.+)'
5698 _TESTS = [{
5699 'url': 'ytuser:phihag',
5700 'only_matching': True,
5701 }]
5702
5703 def _real_extract(self, url):
5704 user_id = self._match_id(url)
5705 return self.url_result(
c586f9e8 5706 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 5707 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 5708
b05654f0 5709
3d3dddc9 5710class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 5711 IE_NAME = 'youtube:favorites'
96565c7e 5712 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 5713 _VALID_URL = r':ytfav(?:ou?rite)?s?'
5714 _LOGIN_REQUIRED = True
5715 _TESTS = [{
5716 'url': ':ytfav',
5717 'only_matching': True,
5718 }, {
5719 'url': ':ytfavorites',
5720 'only_matching': True,
5721 }]
5722
5723 def _real_extract(self, url):
5724 return self.url_result(
5725 'https://www.youtube.com/playlist?list=LL',
5726 ie=YoutubeTabIE.ie_key())
5727
5728
ca5300c7 5729class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
5730 IE_NAME = 'youtube:notif'
5731 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
5732 _VALID_URL = r':ytnotif(?:ication)?s?'
5733 _LOGIN_REQUIRED = True
5734 _TESTS = [{
5735 'url': ':ytnotif',
5736 'only_matching': True,
5737 }, {
5738 'url': ':ytnotifications',
5739 'only_matching': True,
5740 }]
5741
5742 def _extract_notification_menu(self, response, continuation_list):
5743 notification_list = traverse_obj(
5744 response,
5745 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
5746 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
5747 expected_type=list) or []
5748 continuation_list[0] = None
5749 for item in notification_list:
5750 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
5751 if entry:
5752 yield entry
5753 continuation = item.get('continuationItemRenderer')
5754 if continuation:
5755 continuation_list[0] = continuation
5756
5757 def _extract_notification_renderer(self, notification):
5758 video_id = traverse_obj(
5759 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
5760 url = f'https://www.youtube.com/watch?v={video_id}'
5761 channel_id = None
5762 if not video_id:
5763 browse_ep = traverse_obj(
5764 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
5765 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
5766 post_id = self._search_regex(
5767 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
5768 'post id', default=None)
5769 if not channel_id or not post_id:
5770 return
5771 # The direct /post url redirects to this in the browser
5772 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
5773
5774 channel = traverse_obj(
5775 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
5776 expected_type=str)
c7a7baaa 5777 notification_title = self._get_text(notification, 'shortMessage')
5778 if notification_title:
5779 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
5780 # TODO: handle recommended videos
ca5300c7 5781 title = self._search_regex(
c7a7baaa 5782 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
ca5300c7 5783 'video title', default=None)
ca5300c7 5784 upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
5785 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
5786 else None)
5787 return {
5788 '_type': 'url',
5789 'url': url,
5790 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
5791 'video_id': video_id,
5792 'title': title,
5793 'channel_id': channel_id,
5794 'channel': channel,
5795 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5796 'upload_date': upload_date,
5797 }
5798
5799 def _notification_menu_entries(self, ytcfg):
5800 continuation_list = [None]
5801 response = None
5802 for page in itertools.count(1):
5803 ctoken = traverse_obj(
5804 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
5805 response = self._extract_response(
5806 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
5807 ep='notification/get_notification_menu', check_get_keys='actions',
5808 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
5809 yield from self._extract_notification_menu(response, continuation_list)
5810 if not continuation_list[0]:
5811 break
5812
5813 def _real_extract(self, url):
5814 display_id = 'notifications'
5815 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
5816 self._report_playlist_authcheck(ytcfg)
5817 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
5818
5819
a6213a49 5820class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5821 IE_DESC = 'YouTube search'
78caa52a 5822 IE_NAME = 'youtube:search'
b05654f0 5823 _SEARCH_KEY = 'ytsearch'
a61fd4cf 5824 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
84bbc545 5825 _TESTS = [{
5826 'url': 'ytsearch5:youtube-dl test video',
5827 'playlist_count': 5,
5828 'info_dict': {
5829 'id': 'youtube-dl test video',
5830 'title': 'youtube-dl test video',
5831 }
5832 }]
b05654f0 5833
a61fd4cf 5834
5f7cb91a 5835class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 5836 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 5837 _SEARCH_KEY = 'ytsearchdate'
a6213a49 5838 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 5839 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
84bbc545 5840 _TESTS = [{
5841 'url': 'ytsearchdate5:youtube-dl test video',
5842 'playlist_count': 5,
5843 'info_dict': {
5844 'id': 'youtube-dl test video',
5845 'title': 'youtube-dl test video',
5846 }
5847 }]
75dff0ee 5848
c9ae7b95 5849
a6213a49 5850class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 5851 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 5852 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
182bda88 5853 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
3462ffa8 5854 _TESTS = [{
5855 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
5856 'playlist_mincount': 5,
5857 'info_dict': {
11f9be09 5858 'id': 'youtube-dl test video',
3462ffa8 5859 'title': 'youtube-dl test video',
5860 }
a61fd4cf 5861 }, {
5862 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
5863 'playlist_mincount': 5,
5864 'info_dict': {
5865 'id': 'python',
5866 'title': 'python',
5867 }
ad210f4f 5868 }, {
5869 'url': 'https://www.youtube.com/results?search_query=%23cats',
5870 'playlist_mincount': 1,
5871 'info_dict': {
5872 'id': '#cats',
5873 'title': '#cats',
12a1b225
A
5874 # The test suite does not have support for nested playlists
5875 # 'entries': [{
5876 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
5877 # 'title': '#cats',
5878 # }],
ad210f4f 5879 },
3462ffa8 5880 }, {
5881 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
5882 'only_matching': True,
5883 }]
5884
5885 def _real_extract(self, url):
4dfbf869 5886 qs = parse_qs(url)
386e1dd9 5887 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 5888 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 5889
5890
16aa9ea4 5891class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
62b58c09 5892 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
16aa9ea4 5893 IE_NAME = 'youtube:music:search_url'
5894 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5895 _TESTS = [{
5896 'url': 'https://music.youtube.com/search?q=royalty+free+music',
5897 'playlist_count': 16,
5898 'info_dict': {
5899 'id': 'royalty free music',
5900 'title': 'royalty free music',
5901 }
5902 }, {
5903 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
5904 'playlist_mincount': 30,
5905 'info_dict': {
5906 'id': 'royalty free music - songs',
5907 'title': 'royalty free music - songs',
5908 },
5909 'params': {'extract_flat': 'in_playlist'}
5910 }, {
5911 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
5912 'playlist_mincount': 30,
5913 'info_dict': {
5914 'id': 'royalty free music - community playlists',
5915 'title': 'royalty free music - community playlists',
5916 },
5917 'params': {'extract_flat': 'in_playlist'}
5918 }]
5919
5920 _SECTIONS = {
5921 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
5922 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
5923 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
5924 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
5925 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
5926 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
5927 }
5928
5929 def _real_extract(self, url):
5930 qs = parse_qs(url)
5931 query = (qs.get('search_query') or qs.get('q'))[0]
5932 params = qs.get('sp', (None,))[0]
5933 if params:
5934 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
5935 else:
ac668111 5936 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
16aa9ea4 5937 params = self._SECTIONS.get(section)
5938 if not params:
5939 section = None
5940 title = join_nonempty(query, section, delim=' - ')
af5c1c55 5941 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
16aa9ea4 5942
5943
182bda88 5944class YoutubeFeedsInfoExtractor(InfoExtractor):
d7ae0639 5945 """
25f14e9f 5946 Base class for feed extractors
82d02080 5947 Subclasses must re-define the _FEED_NAME property.
d7ae0639 5948 """
b2e8bc1b 5949 _LOGIN_REQUIRED = True
82d02080 5950 _FEED_NAME = 'feeds'
a25bca9f 5951
5952 def _real_initialize(self):
5953 YoutubeBaseInfoExtractor._check_login_required(self)
d7ae0639 5954
82d02080 5955 @classproperty
d7ae0639 5956 def IE_NAME(self):
82d02080 5957 return f'youtube:{self._FEED_NAME}'
04cc9617 5958
3853309f 5959 def _real_extract(self, url):
3d3dddc9 5960 return self.url_result(
182bda88 5961 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
25f14e9f
S
5962
5963
ef2f3c7f 5964class YoutubeWatchLaterIE(InfoExtractor):
5965 IE_NAME = 'youtube:watchlater'
96565c7e 5966 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 5967 _VALID_URL = r':ytwatchlater'
bc7a9cd8 5968 _TESTS = [{
8bdd16b4 5969 'url': ':ytwatchlater',
bc7a9cd8
S
5970 'only_matching': True,
5971 }]
25f14e9f
S
5972
5973 def _real_extract(self, url):
ef2f3c7f 5974 return self.url_result(
5975 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 5976
5977
25f14e9f 5978class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 5979 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 5980 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 5981 _FEED_NAME = 'recommended'
45db527f 5982 _LOGIN_REQUIRED = False
3d3dddc9 5983 _TESTS = [{
5984 'url': ':ytrec',
5985 'only_matching': True,
5986 }, {
5987 'url': ':ytrecommended',
5988 'only_matching': True,
5989 }, {
5990 'url': 'https://youtube.com',
5991 'only_matching': True,
5992 }]
1ed5b5c9 5993
1ed5b5c9 5994
25f14e9f 5995class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 5996 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 5997 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 5998 _FEED_NAME = 'subscriptions'
3d3dddc9 5999 _TESTS = [{
6000 'url': ':ytsubs',
6001 'only_matching': True,
6002 }, {
6003 'url': ':ytsubscriptions',
6004 'only_matching': True,
6005 }]
1ed5b5c9 6006
1ed5b5c9 6007
25f14e9f 6008class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 6009 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 6010 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 6011 _FEED_NAME = 'history'
3d3dddc9 6012 _TESTS = [{
6013 'url': ':ythistory',
6014 'only_matching': True,
6015 }]
1ed5b5c9
JMF
6016
6017
6e634cbe 6018class YoutubeStoriesIE(InfoExtractor):
6019 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6020 IE_NAME = 'youtube:stories'
6021 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6022 _TESTS = [{
6023 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6024 'only_matching': True,
6025 }]
6026
6027 def _real_extract(self, url):
6028 playlist_id = f'RLTD{self._match_id(url)}'
6029 return self.url_result(
50ac0e54 6030 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6e634cbe 6031 ie=YoutubeTabIE, video_id=playlist_id)
6032
6033
15870e90
PH
6034class YoutubeTruncatedURLIE(InfoExtractor):
6035 IE_NAME = 'youtube:truncated_url'
6036 IE_DESC = False # Do not list
975d35db 6037 _VALID_URL = r'''(?x)
b95aab84
PH
6038 (?:https?://)?
6039 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6040 (?:watch\?(?:
c4808c60 6041 feature=[a-z_]+|
b95aab84
PH
6042 annotation_id=annotation_[^&]+|
6043 x-yt-cl=[0-9]+|
c1708b89 6044 hl=[^&]*|
287be8c6 6045 t=[0-9]+
b95aab84
PH
6046 )?
6047 |
6048 attribution_link\?a=[^&]+
6049 )
6050 $
975d35db 6051 '''
15870e90 6052
c4808c60 6053 _TESTS = [{
2d3d2997 6054 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 6055 'only_matching': True,
dc2fc736 6056 }, {
2d3d2997 6057 'url': 'https://www.youtube.com/watch?',
dc2fc736 6058 'only_matching': True,
b95aab84
PH
6059 }, {
6060 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6061 'only_matching': True,
6062 }, {
6063 'url': 'https://www.youtube.com/watch?feature=foo',
6064 'only_matching': True,
c1708b89
PH
6065 }, {
6066 'url': 'https://www.youtube.com/watch?hl=en-GB',
6067 'only_matching': True,
287be8c6
PH
6068 }, {
6069 'url': 'https://www.youtube.com/watch?t=2372',
6070 'only_matching': True,
c4808c60
PH
6071 }]
6072
15870e90
PH
6073 def _real_extract(self, url):
6074 raise ExtractorError(
78caa52a
PH
6075 'Did you forget to quote the URL? Remember that & is a meta '
6076 'character in most shells, so you want to put the URL in quotes, '
3867038a 6077 'like youtube-dl '
2d3d2997 6078 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 6079 ' or simply youtube-dl BaW_jenozKc .',
15870e90 6080 expected=True)
772fd5cc
PH
6081
6082
471d0367 6083class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
3cd786db 6084 IE_NAME = 'youtube:clip'
471d0367 6085 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6086 _TESTS = [{
6087 # FIXME: Other metadata should be extracted from the clip, not from the base video
6088 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6089 'info_dict': {
6090 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6091 'ext': 'mp4',
6092 'section_start': 29.0,
6093 'section_end': 39.7,
6094 'duration': 10.7,
12a1b225
A
6095 'age_limit': 0,
6096 'availability': 'public',
6097 'categories': ['Gaming'],
6098 'channel': 'Scott The Woz',
6099 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6100 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6101 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6102 'like_count': int,
6103 'playable_in_embed': True,
6104 'tags': 'count:17',
6105 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6106 'title': 'Mobile Games on Console - Scott The Woz',
6107 'upload_date': '20210920',
6108 'uploader': 'Scott The Woz',
6109 'uploader_id': 'scottthewoz',
6110 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6111 'view_count': int,
6112 'live_status': 'not_live',
6113 'channel_follower_count': int
471d0367 6114 }
6115 }]
3cd786db 6116
6117 def _real_extract(self, url):
471d0367 6118 clip_id = self._match_id(url)
6119 _, data = self._extract_webpage(url, clip_id)
6120
6121 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6122 if not video_id:
6123 raise ExtractorError('Unable to find video ID')
6124
6125 clip_data = traverse_obj(data, (
6126 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6127 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6128 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6129 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6130
6131 return {
6132 '_type': 'url_transparent',
6133 'url': f'https://www.youtube.com/watch?v={video_id}',
6134 'ie_key': YoutubeIE.ie_key(),
6135 'id': clip_id,
6136 'section_start': int(clip_data['startTimeMs']) / 1000,
6137 'section_end': int(clip_data['endTimeMs']) / 1000,
6138 }
3cd786db 6139
6140
772fd5cc
PH
6141class YoutubeTruncatedIDIE(InfoExtractor):
6142 IE_NAME = 'youtube:truncated_id'
6143 IE_DESC = False # Do not list
b95aab84 6144 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
6145
6146 _TESTS = [{
6147 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6148 'only_matching': True,
6149 }]
6150
6151 def _real_extract(self, url):
6152 video_id = self._match_id(url)
6153 raise ExtractorError(
86e5f3ed 6154 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
772fd5cc 6155 expected=True)