]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[extractor/BiliIntlSeries] Fix `_VALID_URL`
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
6e634cbe 1import base64
d92f5d5a 2import calendar
109dd3b2 3import copy
fe93e2c4 4import datetime
a5c56234 5import hashlib
0ca96d48 6import itertools
c5e8d7af 7import json
720c3099 8import math
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
46383212 12import sys
f8271158 13import threading
8a784c74 14import time
e0df6211 15import traceback
14f25df2 16import urllib.error
ac668111 17import urllib.parse
c5e8d7af 18
b05654f0 19from .common import InfoExtractor, SearchInfoExtractor
25836db6 20from .openload import PhantomJSwrapper
14f25df2 21from ..compat import functools
545cc85d 22from ..jsinterp import JSInterpreter
4bb4a188 23from ..utils import (
f8271158 24 NO_DEFAULT,
25 ExtractorError,
693f0600 26 UserNotLive,
720c3099 27 bug_reports_message,
82d02080 28 classproperty,
c5e8d7af 29 clean_html,
d92f5d5a 30 datetime_from_str,
11f9be09 31 dict_get,
2d30521a 32 float_or_none,
11f9be09 33 format_field,
ff91cf74 34 get_first,
dd27fd17 35 int_or_none,
641ad5d8 36 is_html,
34921b43 37 join_nonempty,
48416bc4 38 js_to_json,
94278f72 39 mimetype2ext,
9c0d7f49 40 network_exceptions,
11f9be09 41 orderedSet,
6310acf5 42 parse_codecs,
49bd8c66 43 parse_count,
7c80519c 44 parse_duration,
7ea65411 45 parse_iso8601,
4dfbf869 46 parse_qs,
dca3ff4a 47 qualities,
3995d37d 48 remove_start,
cf7e015f 49 smuggle_url,
dbdaaa23 50 str_or_none,
c93d53f5 51 str_to_int,
f3aa3c3f 52 strftime_or_none,
7c365c21 53 traverse_obj,
556dbe7f 54 try_get,
c5e8d7af
PH
55 unescapeHTML,
56 unified_strdate,
f0d785d3 57 unified_timestamp,
cf7e015f 58 unsmuggle_url,
8bdd16b4 59 update_url_query,
21c340b8 60 url_or_none,
fe93e2c4 61 urljoin,
7c365c21 62 variadic,
c5e8d7af
PH
63)
64
962ffcf8 65# any clients starting with _ cannot be explicitly requested by the user
000c15a4 66INNERTUBE_CLIENTS = {
67 'web': {
68 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
69 'INNERTUBE_CONTEXT': {
70 'client': {
71 'clientName': 'WEB',
a0c830f4 72 'clientVersion': '2.20220801.00.00',
000c15a4 73 }
74 },
75 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
76 },
77 'web_embedded': {
78 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
79 'INNERTUBE_CONTEXT': {
80 'client': {
81 'clientName': 'WEB_EMBEDDED_PLAYER',
a0c830f4 82 'clientVersion': '1.20220731.00.00',
000c15a4 83 },
84 },
85 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
86 },
87 'web_music': {
88 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
89 'INNERTUBE_HOST': 'music.youtube.com',
90 'INNERTUBE_CONTEXT': {
91 'client': {
92 'clientName': 'WEB_REMIX',
a0c830f4 93 'clientVersion': '1.20220727.01.00',
000c15a4 94 }
95 },
96 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
97 },
e7e94f2a 98 'web_creator': {
18c7683d 99 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
100 'INNERTUBE_CONTEXT': {
101 'client': {
102 'clientName': 'WEB_CREATOR',
a0c830f4 103 'clientVersion': '1.20220726.00.00',
e7e94f2a
D
104 }
105 },
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
107 },
000c15a4 108 'android': {
18c7683d 109 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 110 'INNERTUBE_CONTEXT': {
111 'client': {
112 'clientName': 'ANDROID',
50ac0e54 113 'clientVersion': '17.31.35',
114 'androidSdkVersion': 30,
115 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 116 }
117 },
118 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 119 'REQUIRE_JS_PLAYER': False
000c15a4 120 },
121 'android_embedded': {
18c7683d 122 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 123 'INNERTUBE_CONTEXT': {
124 'client': {
125 'clientName': 'ANDROID_EMBEDDED_PLAYER',
50ac0e54 126 'clientVersion': '17.31.35',
127 'androidSdkVersion': 30,
128 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
000c15a4 129 },
130 },
b6de707d 131 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
132 'REQUIRE_JS_PLAYER': False
000c15a4 133 },
134 'android_music': {
18c7683d 135 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 136 'INNERTUBE_CONTEXT': {
137 'client': {
138 'clientName': 'ANDROID_MUSIC',
a0c830f4 139 'clientVersion': '5.16.51',
50ac0e54 140 'androidSdkVersion': 30,
141 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
000c15a4 142 }
143 },
144 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 145 'REQUIRE_JS_PLAYER': False
000c15a4 146 },
e7e94f2a 147 'android_creator': {
18c7683d 148 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
149 'INNERTUBE_CONTEXT': {
150 'client': {
151 'clientName': 'ANDROID_CREATOR',
50ac0e54 152 'clientVersion': '22.30.100',
153 'androidSdkVersion': 30,
154 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
e7e94f2a
D
155 },
156 },
b6de707d 157 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
158 'REQUIRE_JS_PLAYER': False
e7e94f2a 159 },
18c7683d 160 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
161 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 162 'ios': {
18c7683d 163 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 164 'INNERTUBE_CONTEXT': {
165 'client': {
166 'clientName': 'IOS',
224b5a35 167 'clientVersion': '17.33.2',
18c7683d 168 'deviceModel': 'iPhone14,3',
224b5a35 169 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 170 }
171 },
b6de707d 172 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
173 'REQUIRE_JS_PLAYER': False
000c15a4 174 },
175 'ios_embedded': {
000c15a4 176 'INNERTUBE_CONTEXT': {
177 'client': {
178 'clientName': 'IOS_MESSAGES_EXTENSION',
224b5a35 179 'clientVersion': '17.33.2',
18c7683d 180 'deviceModel': 'iPhone14,3',
224b5a35 181 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 182 },
183 },
b6de707d 184 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
185 'REQUIRE_JS_PLAYER': False
000c15a4 186 },
187 'ios_music': {
18c7683d 188 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 189 'INNERTUBE_CONTEXT': {
190 'client': {
191 'clientName': 'IOS_MUSIC',
224b5a35
SF
192 'clientVersion': '5.21',
193 'deviceModel': 'iPhone14,3',
194 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
000c15a4 195 },
196 },
b6de707d 197 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
198 'REQUIRE_JS_PLAYER': False
000c15a4 199 },
e7e94f2a
D
200 'ios_creator': {
201 'INNERTUBE_CONTEXT': {
202 'client': {
203 'clientName': 'IOS_CREATOR',
224b5a35
SF
204 'clientVersion': '22.33.101',
205 'deviceModel': 'iPhone14,3',
206 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
e7e94f2a
D
207 },
208 },
b6de707d 209 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
210 'REQUIRE_JS_PLAYER': False
e7e94f2a 211 },
3619f78d 212 # mweb has 'ultralow' formats
213 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 214 'mweb': {
18c7683d 215 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 216 'INNERTUBE_CONTEXT': {
217 'client': {
218 'clientName': 'MWEB',
a0c830f4 219 'clientVersion': '2.20220801.00.00',
000c15a4 220 }
221 },
222 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
e7870111
D
223 },
224 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
225 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
226 'tv_embedded': {
227 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
228 'INNERTUBE_CONTEXT': {
229 'client': {
230 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
231 'clientVersion': '2.0',
232 },
233 },
234 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
235 },
000c15a4 236}
237
238
e7870111
D
239def _split_innertube_client(client_name):
240 variant, *base = client_name.rsplit('.', 1)
241 if base:
242 return variant, base[0], variant
243 base, *variant = client_name.split('_', 1)
244 return client_name, base, variant[0] if variant else None
245
246
000c15a4 247def build_innertube_clients():
2e4cacd0 248 THIRD_PARTY = {
e7870111 249 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
65c2fde2 250 }
e7870111 251 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
2e4cacd0 252 priority = qualities(BASE_CLIENTS[::-1])
000c15a4 253
254 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 255 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 256 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 257 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 258 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
000c15a4 259
e7870111 260 _, base_client, variant = _split_innertube_client(client)
2e4cacd0 261 ytcfg['priority'] = 10 * priority(base_client)
262
e48b3875 263 if not variant:
e7870111
D
264 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
265 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
266 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
267 embedscreen['priority'] -= 3
268 elif variant == 'embedded':
e48b3875 269 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
000c15a4 270 ytcfg['priority'] -= 2
e48b3875 271 else:
000c15a4 272 ytcfg['priority'] -= 3
273
274
275build_innertube_clients()
276
277
de7f3446 278class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 279 """Provide base functions for Youtube extractors"""
e00eb564 280
3462ffa8 281 _RESERVED_NAMES = (
3cd786db 282 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
182bda88 283 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
3619f78d 284 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 285 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 286
3619f78d 287 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
288
52efa4b3 289 # _NETRC_MACHINE = 'youtube'
3619f78d 290
b2e8bc1b
JMF
291 # If True it will raise an error if no login info is provided
292 _LOGIN_REQUIRED = False
293
d9190e44
RH
294 _INVIDIOUS_SITES = (
295 # invidious-redirect websites
296 r'(?:www\.)?redirect\.invidious\.io',
297 r'(?:(?:www|dev)\.)?invidio\.us',
0a41f331 298 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
d9190e44
RH
299 r'(?:www\.)?invidious\.pussthecat\.org',
300 r'(?:www\.)?invidious\.zee\.li',
301 r'(?:www\.)?invidious\.ethibox\.fr',
302 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
303 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
304 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
305 # youtube-dl invidious instances list
306 r'(?:(?:www|no)\.)?invidiou\.sh',
307 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
308 r'(?:www\.)?invidious\.kabi\.tk',
309 r'(?:www\.)?invidious\.mastodon\.host',
310 r'(?:www\.)?invidious\.zapashcanon\.fr',
311 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
312 r'(?:www\.)?invidious\.tinfoil-hat\.net',
313 r'(?:www\.)?invidious\.himiko\.cloud',
314 r'(?:www\.)?invidious\.reallyancient\.tech',
315 r'(?:www\.)?invidious\.tube',
316 r'(?:www\.)?invidiou\.site',
317 r'(?:www\.)?invidious\.site',
318 r'(?:www\.)?invidious\.xyz',
319 r'(?:www\.)?invidious\.nixnet\.xyz',
320 r'(?:www\.)?invidious\.048596\.xyz',
321 r'(?:www\.)?invidious\.drycat\.fr',
322 r'(?:www\.)?inv\.skyn3t\.in',
323 r'(?:www\.)?tube\.poal\.co',
324 r'(?:www\.)?tube\.connect\.cafe',
325 r'(?:www\.)?vid\.wxzm\.sx',
326 r'(?:www\.)?vid\.mint\.lgbt',
327 r'(?:www\.)?vid\.puffyan\.us',
328 r'(?:www\.)?yewtu\.be',
329 r'(?:www\.)?yt\.elukerio\.org',
330 r'(?:www\.)?yt\.lelux\.fi',
331 r'(?:www\.)?invidious\.ggc-project\.de',
332 r'(?:www\.)?yt\.maisputain\.ovh',
333 r'(?:www\.)?ytprivate\.com',
334 r'(?:www\.)?invidious\.13ad\.de',
335 r'(?:www\.)?invidious\.toot\.koeln',
336 r'(?:www\.)?invidious\.fdn\.fr',
337 r'(?:www\.)?watch\.nettohikari\.com',
338 r'(?:www\.)?invidious\.namazso\.eu',
339 r'(?:www\.)?invidious\.silkky\.cloud',
340 r'(?:www\.)?invidious\.exonip\.de',
341 r'(?:www\.)?invidious\.riverside\.rocks',
342 r'(?:www\.)?invidious\.blamefran\.net',
343 r'(?:www\.)?invidious\.moomoo\.de',
344 r'(?:www\.)?ytb\.trom\.tf',
345 r'(?:www\.)?yt\.cyberhost\.uk',
346 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
347 r'(?:www\.)?qklhadlycap4cnod\.onion',
348 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
349 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
350 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
351 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
352 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
353 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
354 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
355 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
356 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
357 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
d1c4f6d4
JW
358 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
359 r'(?:www\.)?piped\.kavin\.rocks',
360 r'(?:www\.)?piped\.silkky\.cloud',
361 r'(?:www\.)?piped\.tokhmi\.xyz',
362 r'(?:www\.)?piped\.moomoo\.me',
363 r'(?:www\.)?il\.ax',
364 r'(?:www\.)?piped\.syncpundit\.com',
365 r'(?:www\.)?piped\.mha\.fi',
366 r'(?:www\.)?piped\.mint\.lgbt',
367 r'(?:www\.)?piped\.privacy\.com\.de',
d9190e44
RH
368 )
369
cce889b9 370 def _initialize_consent(self):
371 cookies = self._get_cookies('https://www.youtube.com/')
372 if cookies.get('__Secure-3PSID'):
373 return
374 consent_id = None
375 consent = cookies.get('CONSENT')
376 if consent:
377 if 'YES' in consent.value:
378 return
379 consent_id = self._search_regex(
380 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
381 if not consent_id:
382 consent_id = random.randint(100, 999)
383 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 384
f3aa3c3f 385 def _initialize_pref(self):
386 cookies = self._get_cookies('https://www.youtube.com/')
387 pref_cookie = cookies.get('PREF')
388 pref = {}
389 if pref_cookie:
390 try:
14f25df2 391 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
f3aa3c3f 392 except ValueError:
393 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
396a76f7 394 pref.update({'hl': 'en', 'tz': 'UTC'})
14f25df2 395 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
f3aa3c3f 396
b2e8bc1b 397 def _real_initialize(self):
f3aa3c3f 398 self._initialize_pref()
cce889b9 399 self._initialize_consent()
a25bca9f 400 self._check_login_required()
401
402 def _check_login_required(self):
24146491 403 if self._LOGIN_REQUIRED and not self._cookies_passed:
52efa4b3 404 self.raise_login_required('Login details are needed to download this content', method='cookies')
c5e8d7af 405
b7c47b74 406 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
407 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
a0566bbf 408
000c15a4 409 def _get_default_ytcfg(self, client='web'):
410 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 411
000c15a4 412 def _get_innertube_host(self, client='web'):
413 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 414
000c15a4 415 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 416 # try_get but with fallback to default ytcfg client values when present
417 _func = lambda y: try_get(y, getter, expected_type)
418 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
419
000c15a4 420 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 421 return self._ytcfg_get_safe(
422 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
14f25df2 423 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
109dd3b2 424
000c15a4 425 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 426 return self._ytcfg_get_safe(
427 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
14f25df2 428 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
109dd3b2 429
2ae778b8 430 def _select_api_hostname(self, req_api_hostname, default_client=None):
431 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
432 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
433
000c15a4 434 def _extract_api_key(self, ytcfg=None, default_client='web'):
14f25df2 435 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
109dd3b2 436
000c15a4 437 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 438 context = get_first(
439 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 440 # Enforce language and tz for extraction
441 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
442 client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 443 return context
444
cf87314d 445 _SAPISID = None
446
109dd3b2 447 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 448 time_now = round(time.time())
cf87314d 449 if self._SAPISID is None:
450 yt_cookies = self._get_cookies('https://www.youtube.com')
451 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
452 # See: https://github.com/yt-dlp/yt-dlp/issues/393
453 sapisid_cookie = dict_get(
454 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
455 if sapisid_cookie and sapisid_cookie.value:
456 self._SAPISID = sapisid_cookie.value
457 self.write_debug('Extracted SAPISID cookie')
458 # SAPISID cookie is required if not already present
459 if not yt_cookies.get('SAPISID'):
460 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
461 self._set_cookie(
462 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
463 else:
464 self._SAPISID = False
465 if not self._SAPISID:
466 return None
1974e99f 467 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
468 sapisidhash = hashlib.sha1(
86e5f3ed 469 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
1974e99f 470 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
471
472 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 473 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 474 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 475
109dd3b2 476 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 477 data.update(query)
11f9be09 478 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 479 real_headers.update({'content-type': 'application/json'})
480 if headers:
481 real_headers.update(headers)
2ae778b8 482 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
483 or api_key or self._extract_api_key(default_client=default_client))
545cc85d 484 return self._download_json(
2ae778b8 485 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
a5c56234 486 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 487 data=json.dumps(data).encode('utf8'), headers=real_headers,
2ae778b8 488 query={'key': api_key, 'prettyPrint': 'false'})
f4f751af 489
65141660 490 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
491 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
1890fc63 492
99e9e001 493 @staticmethod
494 def _extract_session_index(*data):
495 """
496 Index of current account in account list.
497 See: https://github.com/yt-dlp/yt-dlp/pull/519
498 """
499 for ytcfg in data:
500 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
501 if session_index is not None:
502 return session_index
503
504 # Deprecated?
505 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca 506 if ytcfg:
14f25df2 507 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
a1c5d2ca
M
508 if token:
509 return token
99e9e001 510 if webpage:
511 return self._search_regex(
512 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
513 'identity token', default=None, fatal=False)
a1c5d2ca
M
514
515 @staticmethod
fe93e2c4 516 def _extract_account_syncid(*args):
8ea3f7b9 517 """
518 Extract syncId required to download private playlists of secondary channels
fe93e2c4 519 @params response and/or ytcfg
8ea3f7b9 520 """
fe93e2c4 521 for data in args:
522 # ytcfg includes channel_syncid if on secondary channel
14f25df2 523 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
fe93e2c4 524 if delegated_sid:
525 return delegated_sid
526 sync_ids = (try_get(
527 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
14f25df2 528 lambda x: x['DATASYNC_ID']), str) or '').split('||')
fe93e2c4 529 if len(sync_ids) >= 2 and sync_ids[1]:
530 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
531 # and just "user_syncid||" for primary channel. We only want the channel_syncid
532 return sync_ids[0]
a1c5d2ca 533
ac56cf38 534 @staticmethod
535 def _extract_visitor_data(*args):
536 """
537 Extracts visitorData from an API response or ytcfg
538 Appears to be used to track session state
539 """
9222c381 540 return get_first(
6c73052c 541 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
9222c381 542 expected_type=str)
ac56cf38 543
2762dbb1 544 @functools.cached_property
99e9e001 545 def is_authenticated(self):
546 return bool(self._generate_sapisidhash_header())
547
11f9be09 548 def extract_ytcfg(self, video_id, webpage):
8c54a305 549 if not webpage:
550 return {}
29f7c58a 551 return self._parse_json(
552 self._search_regex(
553 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 554 default='{}'), video_id, fatal=False) or {}
555
11f9be09 556 def generate_api_headers(
99e9e001 557 self, *, ytcfg=None, account_syncid=None, session_index=None,
558 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
559
2ae778b8 560 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
f4f751af 561 headers = {
14f25df2 562 'X-YouTube-Client-Name': str(
11f9be09 563 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
564 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 565 'Origin': origin,
566 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
567 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
50ac0e54 568 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
569 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
99e9e001 570 }
571 if session_index is None:
314ee305 572 session_index = self._extract_session_index(ytcfg)
573 if account_syncid or session_index is not None:
574 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 575
109dd3b2 576 auth = self._generate_sapisidhash_header(origin)
f4f751af 577 if auth is not None:
578 headers['Authorization'] = auth
109dd3b2 579 headers['X-Origin'] = origin
99e9e001 580 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 581
a25bca9f 582 def _download_ytcfg(self, client, video_id):
583 url = {
584 'web': 'https://www.youtube.com',
585 'web_music': 'https://music.youtube.com',
586 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
587 }.get(client)
588 if not url:
589 return {}
590 webpage = self._download_webpage(
591 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
592 return self.extract_ytcfg(video_id, webpage) or {}
593
2d6659b9 594 @staticmethod
595 def _build_api_continuation_query(continuation, ctp=None):
596 query = {
597 'continuation': continuation
598 }
599 # TODO: Inconsistency with clickTrackingParams.
600 # Currently we have a fixed ctp contained within context (from ytcfg)
601 # and a ctp in root query for continuation.
602 if ctp:
603 query['clickTracking'] = {'clickTrackingParams': ctp}
604 return query
605
2d6659b9 606 @classmethod
607 def _extract_next_continuation_data(cls, renderer):
608 next_continuation = try_get(
609 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
610 lambda x: x['continuation']['reloadContinuationData']), dict)
611 if not next_continuation:
612 return
613 continuation = next_continuation.get('continuation')
614 if not continuation:
615 return
616 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 617 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 618
619 @classmethod
620 def _extract_continuation_ep_data(cls, continuation_ep: dict):
621 if isinstance(continuation_ep, dict):
622 continuation = try_get(
14f25df2 623 continuation_ep, lambda x: x['continuationCommand']['token'], str)
2d6659b9 624 if not continuation:
625 return
626 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 627 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 628
629 @classmethod
630 def _extract_continuation(cls, renderer):
631 next_continuation = cls._extract_next_continuation_data(renderer)
632 if next_continuation:
633 return next_continuation
fe93e2c4 634
2d6659b9 635 contents = []
636 for key in ('contents', 'items'):
637 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 638
2d6659b9 639 for content in contents:
640 if not isinstance(content, dict):
641 continue
642 continuation_ep = try_get(
643 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
644 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
645 dict)
646 continuation = cls._extract_continuation_ep_data(continuation_ep)
647 if continuation:
648 return continuation
649
fe93e2c4 650 @classmethod
651 def _extract_alerts(cls, data):
109dd3b2 652 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
653 if not isinstance(alert_dict, dict):
654 continue
655 for alert in alert_dict.values():
656 alert_type = alert.get('type')
657 if not alert_type:
658 continue
052e1350 659 message = cls._get_text(alert, 'text')
109dd3b2 660 if message:
661 yield alert_type, message
662
c0ac49bc 663 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 664 errors = []
665 warnings = []
666 for alert_type, alert_message in alerts:
641ad5d8 667 if alert_type.lower() == 'error' and fatal:
109dd3b2 668 errors.append([alert_type, alert_message])
669 else:
670 warnings.append([alert_type, alert_message])
671
672 for alert_type, alert_message in (warnings + errors[:-1]):
86e5f3ed 673 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
109dd3b2 674 if errors:
675 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
676
677 def _extract_and_report_alerts(self, data, *args, **kwargs):
678 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
679
47193e02 680 def _extract_badges(self, renderer: dict):
681 badges = set()
682 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
14f25df2 683 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
47193e02 684 if label:
685 badges.add(label.lower())
686 return badges
687
688 @staticmethod
052e1350 689 def _get_text(data, *path_list, max_runs=None):
690 for path in path_list or [None]:
691 if path is None:
692 obj = [data]
693 else:
694 obj = traverse_obj(data, path, default=[])
695 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
696 obj = [obj]
697 for item in obj:
14f25df2 698 text = try_get(item, lambda x: x['simpleText'], str)
052e1350 699 if text:
700 return text
701 runs = try_get(item, lambda x: x['runs'], list) or []
702 if not runs and isinstance(item, list):
703 runs = item
704
705 runs = runs[:min(len(runs), max_runs or len(runs))]
706 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
707 if text:
708 return text
47193e02 709
f0d785d3 710 def _get_count(self, data, *path_list):
711 count_text = self._get_text(data, *path_list) or ''
712 count = parse_count(count_text)
713 if count is None:
714 count = str_to_int(
715 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
716 return count
717
a709d873 718 @staticmethod
719 def _extract_thumbnails(data, *path_list):
720 """
721 Extract thumbnails from thumbnails dict
722 @param path_list: path list to level that contains 'thumbnails' key
723 """
724 thumbnails = []
725 for path in path_list or [()]:
726 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
727 thumbnail_url = url_or_none(thumbnail.get('url'))
728 if not thumbnail_url:
729 continue
730 # Sometimes youtube gives a wrong thumbnail URL. See:
731 # https://github.com/yt-dlp/yt-dlp/issues/233
732 # https://github.com/ytdl-org/youtube-dl/issues/28023
733 if 'maxresdefault' in thumbnail_url:
734 thumbnail_url = thumbnail_url.split('?')[0]
735 thumbnails.append({
736 'url': thumbnail_url,
737 'height': int_or_none(thumbnail.get('height')),
738 'width': int_or_none(thumbnail.get('width')),
739 })
740 return thumbnails
741
f3aa3c3f 742 @staticmethod
743 def extract_relative_time(relative_time_text):
744 """
745 Extracts a relative time from string and converts to dt object
f0d785d3 746 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
f3aa3c3f 747 """
f0d785d3 748 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
f3aa3c3f 749 if mobj:
f0d785d3 750 start = mobj.group('start')
751 if start:
752 return datetime_from_str(start)
f3aa3c3f 753 try:
f0d785d3 754 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
f3aa3c3f 755 except ValueError:
756 return None
757
758 def _extract_time_text(self, renderer, *path_list):
a25bca9f 759 """@returns (timestamp, time_text)"""
f3aa3c3f 760 text = self._get_text(renderer, *path_list) or ''
761 dt = self.extract_relative_time(text)
762 timestamp = None
763 if isinstance(dt, datetime.datetime):
764 timestamp = calendar.timegm(dt.timetuple())
f0d785d3 765
766 if timestamp is None:
767 timestamp = (
768 unified_timestamp(text) or unified_timestamp(
769 self._search_regex(
17322130 770 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
396a76f7 771 text.lower(), 'time text', default=None)))
f0d785d3 772
f3aa3c3f 773 if text and timestamp is None:
17322130 774 self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
f3aa3c3f 775 return timestamp, text
776
109dd3b2 777 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
778 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 779 default_client='web'):
be5c1ae8 780 for retry in self.RetryManager():
109dd3b2 781 try:
782 response = self._call_api(
783 ep=ep, fatal=True, headers=headers,
be5c1ae8 784 video_id=item_id, query=query, note=note,
109dd3b2 785 context=self._extract_context(ytcfg, default_client),
786 api_key=self._extract_api_key(ytcfg, default_client),
be5c1ae8 787 api_hostname=api_hostname, default_client=default_client)
109dd3b2 788 except ExtractorError as e:
be5c1ae8 789 if not isinstance(e.cause, network_exceptions):
790 return self._error_or_warning(e, fatal=fatal)
791 elif not isinstance(e.cause, urllib.error.HTTPError):
792 retry.error = e
793 continue
109dd3b2 794
be5c1ae8 795 first_bytes = e.cause.read(512)
796 if not is_html(first_bytes):
797 yt_error = try_get(
798 self._parse_json(
799 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
800 lambda x: x['error']['message'], str)
801 if yt_error:
802 self._report_alerts([('ERROR', yt_error)], fatal=False)
803 # Downloading page may result in intermittent 5xx HTTP error
804 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
805 # We also want to catch all other network exceptions since errors in later pages can be troublesome
806 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
807 if e.cause.code not in (403, 429):
808 retry.error = e
809 continue
810 return self._error_or_warning(e, fatal=fatal)
811
812 try:
813 self._extract_and_report_alerts(response, only_once=True)
814 except ExtractorError as e:
815 # YouTube servers may return errors we want to retry on in a 200 OK response
816 # See: https://github.com/yt-dlp/yt-dlp/issues/839
817 if 'unknown error' in e.msg.lower():
818 retry.error = e
819 continue
820 return self._error_or_warning(e, fatal=fatal)
821 # Youtube sometimes sends incomplete data
822 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
823 if not traverse_obj(response, *variadic(check_get_keys)):
3ce29336 824 retry.error = ExtractorError('Incomplete data received', expected=True)
be5c1ae8 825 continue
826
827 return response
109dd3b2 828
9297939e 829 @staticmethod
830 def is_music_url(url):
831 return re.match(r'https?://music\.youtube\.com/', url) is not None
832
30a074c2 833 def _extract_video(self, renderer):
834 video_id = renderer.get('videoId')
052e1350 835 title = self._get_text(renderer, 'title')
836 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 837 duration = parse_duration(self._get_text(
838 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1c1b2f96 839 if duration is None:
840 duration = parse_duration(self._search_regex(
841 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
842 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
843 video_id, default=None, group='duration'))
844
f0d785d3 845 view_count = self._get_count(renderer, 'viewCountText')
fe93e2c4 846
052e1350 847 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
f3aa3c3f 848 channel_id = traverse_obj(
a44ca5a4 849 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
850 expected_type=str, get_all=False)
f3aa3c3f 851 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
852 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
853 overlay_style = traverse_obj(
a44ca5a4 854 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
855 get_all=False, expected_type=str)
f3aa3c3f 856 badges = self._extract_badges(renderer)
a709d873 857 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
fd2ad7cb 858 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
a44ca5a4 859 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
860 expected_type=str)) or ''
fd2ad7cb 861 url = f'https://www.youtube.com/watch?v={video_id}'
a44ca5a4 862 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
fd2ad7cb 863 url = f'https://www.youtube.com/shorts/{video_id}'
a709d873 864
30a074c2 865 return {
39ed931e 866 '_type': 'url',
30a074c2 867 'ie_key': YoutubeIE.ie_key(),
868 'id': video_id,
fd2ad7cb 869 'url': url,
30a074c2 870 'title': title,
871 'description': description,
872 'duration': duration,
873 'view_count': view_count,
874 'uploader': uploader,
f3aa3c3f 875 'channel_id': channel_id,
a709d873 876 'thumbnails': thumbnails,
a44ca5a4 877 'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
878 if self._configuration_arg('approximate_date', ie_key='youtubetab')
879 else None),
f3aa3c3f 880 'live_status': ('is_upcoming' if scheduled_timestamp is not None
881 else 'was_live' if 'streamed' in time_text.lower()
a831c2ea 882 else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges
f3aa3c3f 883 else None),
884 'release_timestamp': scheduled_timestamp,
885 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
30a074c2 886 }
887
0c148415 888
360e1ca5 889class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 890 IE_DESC = 'YouTube'
cb7dfeea 891 _VALID_URL = r"""(?x)^
c5e8d7af 892 (
edb53e2d 893 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 894 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
895 (?:www\.)?deturl\.com/www\.youtube\.com|
896 (?:www\.)?pwnyoutube\.com|
897 (?:www\.)?hooktube\.com|
898 (?:www\.)?yourepeat\.com|
899 tube\.majestyc\.net|
900 %(invidious)s|
901 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
902 (?:.*?\#/)? # handle anchor (#/) redirect urls
903 (?: # the various things that can precede the ID:
b6ce9bb0 904 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 905 |(?: # or the v= param in all its forms
f7000f3a 906 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 907 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 908 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
909 v=
910 )
f4b05232 911 ))
cbaed4bb
S
912 |(?:
913 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
914 vid\.plus| # or vid.plus/xxxx
915 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 916 %(invidious)s
cbaed4bb 917 )/
edb53e2d 918 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 919 )
c5e8d7af 920 )? # all until now is optional -> you can pass the naked ID
201c1459 921 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 922 (?(1).+)? # if we found the ID, everything can follow
9297939e 923 (?:\#|$)""" % {
d9190e44 924 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 925 }
7c6eb424 926 _EMBED_REGEX = [
927 r'''(?x)
928 (?:
929 <iframe[^>]+?src=|
930 data-video-url=|
931 <embed[^>]+?src=|
932 embedSWF\(?:\s*|
933 <object[^>]+data=|
934 new\s+SWFObject\(
935 )
936 (["\'])
937 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
938 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
939 \1''',
940 # https://wordpress.org/plugins/lazy-load-for-videos/
941 r'''(?xs)
942 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
943 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
944 ]
945
e40c758c 946 _PLAYER_INFO_RE = (
cc2db878 947 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
948 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 949 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 950 )
2c62dc26 951 _formats = {
c2d3cb4c 952 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
953 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
954 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
955 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
956 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
957 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
958 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
959 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 960 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 961 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
962 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
963 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
964 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
965 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
966 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 967 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 968 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
969 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 970
971
972 # 3D videos
c2d3cb4c 973 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
974 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
975 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
976 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 977 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
978 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
979 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 980
96fb5605 981 # Apple HTTP Live Streaming
11f12195 982 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 983 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
984 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
985 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
986 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
987 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 988 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
989 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
990
991 # DASH mp4 video
d23028a8
S
992 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
993 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
994 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
995 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
996 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 997 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
998 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
999 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1000 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1001 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1002 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1003 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 1004
f6f1fc92 1005 # Dash mp4 audio
d23028a8
S
1006 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1007 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1008 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1009 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1010 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1011 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1012 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1013
1014 # Dash webm
d23028a8
S
1015 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1016 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1017 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1018 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1019 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1020 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1021 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1022 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1023 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1024 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1025 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1026 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1027 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1028 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1029 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1030 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1031 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1032 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1033 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1034 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1035 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1036 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1037
1038 # Dash webm audio
d23028a8
S
1039 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1040 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1041
0857baad 1042 # Dash webm audio with opus inside
d23028a8
S
1043 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1044 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1045 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1046
ce6b9a2d
PH
1047 # RTMP (unnamed)
1048 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1049
1050 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1051 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1052 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1053 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1054 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1055 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1056 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1057 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1058 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1059 }
29f7c58a 1060 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1061
fd5c4aab
S
1062 _GEO_BYPASS = False
1063
78caa52a 1064 IE_NAME = 'youtube'
2eb88d95
PH
1065 _TESTS = [
1066 {
2d3d2997 1067 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1068 'info_dict': {
1069 'id': 'BaW_jenozKc',
1070 'ext': 'mp4',
3867038a 1071 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1072 'uploader': 'Philipp Hagemeister',
1073 'uploader_id': 'phihag',
ec85ded8 1074 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 1075 'channel': 'Philipp Hagemeister',
dd4c4492
S
1076 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1077 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1078 'upload_date': '20121002',
ff9f925b 1079 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1080 'categories': ['Science & Technology'],
3867038a 1081 'tags': ['youtube-dl'],
556dbe7f 1082 'duration': 10,
dbdaaa23 1083 'view_count': int,
3e7c1224 1084 'like_count': int,
ff9f925b 1085 'availability': 'public',
1086 'playable_in_embed': True,
1087 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1088 'live_status': 'not_live',
1089 'age_limit': 0,
7c80519c 1090 'start_time': 1,
297a564b 1091 'end_time': 9,
12a1b225 1092 'comment_count': int,
6c73052c 1093 'channel_follower_count': int
2eb88d95 1094 }
0e853ca4 1095 },
fccd3771 1096 {
4bc3a23e
PH
1097 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1098 'note': 'Embed-only video (#1746)',
1099 'info_dict': {
1100 'id': 'yZIXLfi8CZQ',
1101 'ext': 'mp4',
1102 'upload_date': '20120608',
1103 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1104 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1105 'uploader': 'SET India',
94bfcd23 1106 'uploader_id': 'setindia',
ec85ded8 1107 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1108 'age_limit': 18,
545cc85d 1109 },
1110 'skip': 'Private video',
fccd3771 1111 },
11b56058 1112 {
8bdd16b4 1113 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1114 'note': 'Use the first video ID in the URL',
1115 'info_dict': {
1116 'id': 'BaW_jenozKc',
1117 'ext': 'mp4',
3867038a 1118 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1119 'uploader': 'Philipp Hagemeister',
1120 'uploader_id': 'phihag',
ec85ded8 1121 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
976ae3ea 1122 'channel': 'Philipp Hagemeister',
1123 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1124 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1125 'upload_date': '20121002',
976ae3ea 1126 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1127 'categories': ['Science & Technology'],
3867038a 1128 'tags': ['youtube-dl'],
556dbe7f 1129 'duration': 10,
dbdaaa23 1130 'view_count': int,
11b56058 1131 'like_count': int,
976ae3ea 1132 'availability': 'public',
1133 'playable_in_embed': True,
1134 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1135 'live_status': 'not_live',
1136 'age_limit': 0,
12a1b225 1137 'comment_count': int,
6c73052c 1138 'channel_follower_count': int
34a7de29
S
1139 },
1140 'params': {
1141 'skip_download': True,
1142 },
11b56058 1143 },
dd27fd17 1144 {
2d3d2997 1145 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1146 'note': '256k DASH audio (format 141) via DASH manifest',
1147 'info_dict': {
1148 'id': 'a9LDPn-MO4I',
1149 'ext': 'm4a',
1150 'upload_date': '20121002',
1151 'uploader_id': '8KVIDEO',
ec85ded8 1152 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1153 'description': '',
1154 'uploader': '8KVIDEO',
1155 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1156 },
4bc3a23e
PH
1157 'params': {
1158 'youtube_include_dash_manifest': True,
1159 'format': '141',
4919603f 1160 },
de3c7fe0 1161 'skip': 'format 141 not served anymore',
dd27fd17 1162 },
8bdd16b4 1163 # DASH manifest with encrypted signature
1164 {
1165 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1166 'info_dict': {
1167 'id': 'IB3lcPjvWLA',
1168 'ext': 'm4a',
1169 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1170 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1171 'duration': 244,
1172 'uploader': 'AfrojackVEVO',
1173 'uploader_id': 'AfrojackVEVO',
1174 'upload_date': '20131011',
cc2db878 1175 'abr': 129.495,
976ae3ea 1176 'like_count': int,
1177 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1178 'playable_in_embed': True,
1179 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1180 'view_count': int,
1181 'track': 'The Spark',
1182 'live_status': 'not_live',
1183 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1184 'channel': 'Afrojack',
1185 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1186 'tags': 'count:19',
1187 'availability': 'public',
1188 'categories': ['Music'],
1189 'age_limit': 0,
1190 'alt_title': 'The Spark',
6c73052c 1191 'channel_follower_count': int
8bdd16b4 1192 },
1193 'params': {
1194 'youtube_include_dash_manifest': True,
1195 'format': '141/bestaudio[ext=m4a]',
1196 },
1197 },
65c2fde2 1198 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1199 {
65c2fde2 1200 'note': 'Embed allowed age-gate video',
2d3d2997 1201 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1202 'info_dict': {
1203 'id': 'HtVdAasjOgU',
1204 'ext': 'mp4',
1205 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1206 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1207 'duration': 142,
c522adb1
JMF
1208 'uploader': 'The Witcher',
1209 'uploader_id': 'WitcherGame',
ec85ded8 1210 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1211 'upload_date': '20140605',
34952f09 1212 'age_limit': 18,
976ae3ea 1213 'categories': ['Gaming'],
1214 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1215 'availability': 'needs_auth',
1216 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1217 'like_count': int,
1218 'channel': 'The Witcher',
1219 'live_status': 'not_live',
1220 'tags': 'count:17',
1221 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1222 'playable_in_embed': True,
1223 'view_count': int,
6c73052c 1224 'channel_follower_count': int
c522adb1
JMF
1225 },
1226 },
65c2fde2 1227 {
1228 'note': 'Age-gate video with embed allowed in public site',
1229 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1230 'info_dict': {
1231 'id': 'HsUATh_Nc2U',
1232 'ext': 'mp4',
1233 'title': 'Godzilla 2 (Official Video)',
1234 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1235 'upload_date': '20200408',
1236 'uploader_id': 'FlyingKitty900',
1237 'uploader': 'FlyingKitty',
1238 'age_limit': 18,
976ae3ea 1239 'availability': 'needs_auth',
1240 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1241 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1242 'channel': 'FlyingKitty',
1243 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1244 'view_count': int,
1245 'categories': ['Entertainment'],
1246 'live_status': 'not_live',
1247 'tags': ['Flyingkitty', 'godzilla 2'],
1248 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1249 'like_count': int,
1250 'duration': 177,
1251 'playable_in_embed': True,
6c73052c 1252 'channel_follower_count': int
65c2fde2 1253 },
1254 },
1255 {
1256 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1257 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1258 'info_dict': {
1259 'id': 'Tq92D6wQ1mg',
1260 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1261 'ext': 'mp4',
17322130 1262 'upload_date': '20191228',
65c2fde2 1263 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1264 'uploader': 'Projekt Melody',
1265 'description': 'md5:17eccca93a786d51bc67646756894066',
1266 'age_limit': 18,
976ae3ea 1267 'like_count': int,
1268 'availability': 'needs_auth',
1269 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1270 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1271 'view_count': int,
1272 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1273 'channel': 'Projekt Melody',
1274 'live_status': 'not_live',
1275 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1276 'playable_in_embed': True,
1277 'categories': ['Entertainment'],
1278 'duration': 106,
1279 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
12a1b225 1280 'comment_count': int,
6c73052c 1281 'channel_follower_count': int
65c2fde2 1282 },
1283 },
1284 {
1285 'note': 'Non-Agegated non-embeddable video',
1286 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1287 'info_dict': {
1288 'id': 'MeJVWBSsPAY',
1289 'ext': 'mp4',
1290 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1291 'uploader': 'Herr Lurik',
1292 'uploader_id': 'st3in234',
1293 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1294 'upload_date': '20130730',
976ae3ea 1295 'track': 'Such mich find mich',
1296 'age_limit': 0,
1297 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1298 'like_count': int,
1299 'playable_in_embed': False,
1300 'creator': 'OOMPH!',
1301 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1302 'view_count': int,
1303 'alt_title': 'Such mich find mich',
1304 'duration': 210,
1305 'channel': 'Herr Lurik',
1306 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1307 'categories': ['Music'],
1308 'availability': 'public',
1309 'uploader_url': 'http://www.youtube.com/user/st3in234',
1310 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1311 'live_status': 'not_live',
1312 'artist': 'OOMPH!',
6c73052c 1313 'channel_follower_count': int
65c2fde2 1314 },
1315 },
1316 {
1317 'note': 'Non-bypassable age-gated video',
1318 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1319 'only_matching': True,
1320 },
8bdd16b4 1321 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1322 # YouTube Red ad is not captured for creator
1323 {
1324 'url': '__2ABJjxzNo',
1325 'info_dict': {
1326 'id': '__2ABJjxzNo',
1327 'ext': 'mp4',
1328 'duration': 266,
1329 'upload_date': '20100430',
1330 'uploader_id': 'deadmau5',
1331 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1332 'creator': 'deadmau5',
1333 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1334 'uploader': 'deadmau5',
1335 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1336 'alt_title': 'Some Chords',
976ae3ea 1337 'availability': 'public',
1338 'tags': 'count:14',
1339 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1340 'view_count': int,
1341 'live_status': 'not_live',
1342 'channel': 'deadmau5',
1343 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1344 'like_count': int,
1345 'track': 'Some Chords',
1346 'artist': 'deadmau5',
1347 'playable_in_embed': True,
1348 'age_limit': 0,
1349 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1350 'categories': ['Music'],
1351 'album': 'Some Chords',
6c73052c 1352 'channel_follower_count': int
8bdd16b4 1353 },
1354 'expected_warnings': [
1355 'DASH manifest missing',
1356 ]
1357 },
067aa17e 1358 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1359 {
1360 'url': 'lqQg6PlCWgI',
1361 'info_dict': {
1362 'id': 'lqQg6PlCWgI',
1363 'ext': 'mp4',
556dbe7f 1364 'duration': 6085,
90227264 1365 'upload_date': '20150827',
cbe2bd91 1366 'uploader_id': 'olympic',
ec85ded8 1367 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
12a1b225 1368 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
11f9be09 1369 'uploader': 'Olympics',
cbe2bd91 1370 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1371 'like_count': int,
1372 'release_timestamp': 1343767800,
1373 'playable_in_embed': True,
1374 'categories': ['Sports'],
1375 'release_date': '20120731',
1376 'channel': 'Olympics',
1377 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1378 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1379 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1380 'age_limit': 0,
1381 'availability': 'public',
1382 'live_status': 'was_live',
1383 'view_count': int,
1384 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
6c73052c 1385 'channel_follower_count': int
cbe2bd91
PH
1386 },
1387 'params': {
1388 'skip_download': 'requires avconv',
e52a40ab 1389 }
cbe2bd91 1390 },
6271f1ca
PH
1391 # Non-square pixels
1392 {
1393 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1394 'info_dict': {
1395 'id': '_b-2C3KPAM0',
1396 'ext': 'mp4',
1397 'stretched_ratio': 16 / 9.,
556dbe7f 1398 'duration': 85,
6271f1ca
PH
1399 'upload_date': '20110310',
1400 'uploader_id': 'AllenMeow',
ec85ded8 1401 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1402 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1403 'uploader': '孫ᄋᄅ',
6271f1ca 1404 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1405 'playable_in_embed': True,
1406 'channel': '孫ᄋᄅ',
1407 'age_limit': 0,
1408 'tags': 'count:11',
1409 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1410 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1411 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1412 'view_count': int,
1413 'categories': ['People & Blogs'],
1414 'like_count': int,
1415 'live_status': 'not_live',
1416 'availability': 'unlisted',
12a1b225 1417 'comment_count': int,
6c73052c 1418 'channel_follower_count': int
6271f1ca 1419 },
06b491eb
S
1420 },
1421 # url_encoded_fmt_stream_map is empty string
1422 {
1423 'url': 'qEJwOuvDf7I',
1424 'info_dict': {
1425 'id': 'qEJwOuvDf7I',
f57b7835 1426 'ext': 'webm',
06b491eb
S
1427 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1428 'description': '',
1429 'upload_date': '20150404',
1430 'uploader_id': 'spbelect',
1431 'uploader': 'Наблюдатели Петербурга',
1432 },
1433 'params': {
1434 'skip_download': 'requires avconv',
e323cf3f
S
1435 },
1436 'skip': 'This live event has ended.',
06b491eb 1437 },
067aa17e 1438 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1439 {
1440 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1441 'info_dict': {
1442 'id': 'FIl7x6_3R5Y',
eb6793ba 1443 'ext': 'webm',
da77d856
S
1444 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1445 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1446 'duration': 220,
da77d856
S
1447 'upload_date': '20150625',
1448 'uploader_id': 'dorappi2000',
ec85ded8 1449 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1450 'uploader': 'dorappi2000',
eb6793ba 1451 'formats': 'mincount:31',
da77d856 1452 },
eb6793ba 1453 'skip': 'not actual anymore',
2ee8f5d8 1454 },
8a1a26ce
YCH
1455 # DASH manifest with segment_list
1456 {
1457 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1458 'md5': '8ce563a1d667b599d21064e982ab9e31',
1459 'info_dict': {
1460 'id': 'CsmdDsKjzN8',
1461 'ext': 'mp4',
17ee98e1 1462 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1463 'uploader': 'Airtek',
1464 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1465 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1466 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1467 },
1468 'params': {
1469 'youtube_include_dash_manifest': True,
1470 'format': '135', # bestvideo
be49068d
S
1471 },
1472 'skip': 'This live event has ended.',
2ee8f5d8 1473 },
cf7e015f
S
1474 {
1475 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1476 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1477 'info_dict': {
545cc85d 1478 'id': 'jvGDaLqkpTg',
1479 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1480 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1481 },
1482 'playlist': [{
1483 'info_dict': {
545cc85d 1484 'id': 'jvGDaLqkpTg',
cf7e015f 1485 'ext': 'mp4',
545cc85d 1486 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1487 'description': 'md5:e03b909557865076822aa169218d6a5d',
1488 'duration': 10643,
1489 'upload_date': '20161111',
1490 'uploader': 'Team PGP',
1491 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1492 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1493 },
1494 }, {
1495 'info_dict': {
545cc85d 1496 'id': '3AKt1R1aDnw',
cf7e015f 1497 'ext': 'mp4',
545cc85d 1498 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1499 'description': 'md5:e03b909557865076822aa169218d6a5d',
1500 'duration': 10991,
1501 'upload_date': '20161111',
1502 'uploader': 'Team PGP',
1503 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1504 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1505 },
1506 }, {
1507 'info_dict': {
545cc85d 1508 'id': 'RtAMM00gpVc',
cf7e015f 1509 'ext': 'mp4',
545cc85d 1510 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1511 'description': 'md5:e03b909557865076822aa169218d6a5d',
1512 'duration': 10995,
1513 'upload_date': '20161111',
1514 'uploader': 'Team PGP',
1515 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1516 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1517 },
1518 }, {
1519 'info_dict': {
545cc85d 1520 'id': '6N2fdlP3C5U',
cf7e015f 1521 'ext': 'mp4',
545cc85d 1522 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1523 'description': 'md5:e03b909557865076822aa169218d6a5d',
1524 'duration': 10990,
1525 'upload_date': '20161111',
1526 'uploader': 'Team PGP',
1527 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1528 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1529 },
1530 }],
1531 'params': {
1532 'skip_download': True,
1533 },
65c2fde2 1534 'skip': 'Not multifeed anymore',
cbaed4bb 1535 },
f9f49d87 1536 {
067aa17e 1537 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1538 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1539 'info_dict': {
1540 'id': 'gVfLd0zydlo',
1541 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1542 },
1543 'playlist_count': 2,
be49068d 1544 'skip': 'Not multifeed anymore',
f9f49d87 1545 },
cbaed4bb 1546 {
2d3d2997 1547 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1548 'only_matching': True,
0e49d9a6 1549 },
6d4fc66b 1550 {
2d3d2997 1551 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1552 'only_matching': True,
1553 },
0e49d9a6 1554 {
067aa17e 1555 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1556 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1557 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1558 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1559 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1560 'info_dict': {
1561 'id': 'lsguqyKfVQg',
1562 'ext': 'mp4',
1563 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1564 'alt_title': 'Dark Walk',
0e49d9a6 1565 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1566 'duration': 133,
0e49d9a6
LL
1567 'upload_date': '20151119',
1568 'uploader_id': 'IronSoulElf',
ec85ded8 1569 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1570 'uploader': 'IronSoulElf',
11f9be09 1571 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1572 'track': 'Dark Walk',
1573 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1574 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1575 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1576 'categories': ['Film & Animation'],
1577 'view_count': int,
1578 'live_status': 'not_live',
1579 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1580 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1581 'tags': 'count:13',
1582 'availability': 'public',
1583 'channel': 'IronSoulElf',
1584 'playable_in_embed': True,
1585 'like_count': int,
1586 'age_limit': 0,
6c73052c 1587 'channel_follower_count': int
0e49d9a6
LL
1588 },
1589 'params': {
1590 'skip_download': True,
1591 },
1592 },
61f92af1 1593 {
067aa17e 1594 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1595 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1596 'only_matching': True,
1597 },
313dfc45
LL
1598 {
1599 # Video with yt:stretch=17:0
1600 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1601 'info_dict': {
1602 'id': 'Q39EVAstoRM',
1603 'ext': 'mp4',
1604 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1605 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1606 'upload_date': '20151107',
1607 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1608 'uploader': 'CH GAMER DROID',
1609 },
1610 'params': {
1611 'skip_download': True,
1612 },
be49068d 1613 'skip': 'This video does not exist.',
313dfc45 1614 },
201c1459 1615 {
1616 # Video with incomplete 'yt:stretch=16:'
1617 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1618 'only_matching': True,
1619 },
7caf9830
S
1620 {
1621 # Video licensed under Creative Commons
1622 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1623 'info_dict': {
1624 'id': 'M4gD1WSo5mA',
1625 'ext': 'mp4',
1626 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1627 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1628 'duration': 721,
17322130 1629 'upload_date': '20150128',
7caf9830 1630 'uploader_id': 'BerkmanCenter',
ec85ded8 1631 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1632 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830 1633 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1634 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1635 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1636 'like_count': int,
1637 'age_limit': 0,
1638 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1639 'channel': 'The Berkman Klein Center for Internet & Society',
1640 'availability': 'public',
1641 'view_count': int,
1642 'categories': ['Education'],
1643 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1644 'live_status': 'not_live',
1645 'playable_in_embed': True,
12a1b225 1646 'comment_count': int,
6c73052c 1647 'channel_follower_count': int
7caf9830
S
1648 },
1649 'params': {
1650 'skip_download': True,
1651 },
1652 },
fd050249
S
1653 {
1654 # Channel-like uploader_url
1655 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1656 'info_dict': {
1657 'id': 'eQcmzGIKrzg',
1658 'ext': 'mp4',
1659 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1660 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1661 'duration': 4060,
17322130 1662 'upload_date': '20151120',
eb6793ba 1663 'uploader': 'Bernie Sanders',
fd050249 1664 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1665 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249 1666 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1667 'playable_in_embed': True,
1668 'tags': 'count:12',
1669 'like_count': int,
1670 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1671 'age_limit': 0,
1672 'availability': 'public',
1673 'categories': ['News & Politics'],
1674 'channel': 'Bernie Sanders',
1675 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1676 'view_count': int,
1677 'live_status': 'not_live',
1678 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
12a1b225 1679 'comment_count': int,
6c73052c 1680 'channel_follower_count': int
fd050249
S
1681 },
1682 'params': {
1683 'skip_download': True,
1684 },
1685 },
040ac686
S
1686 {
1687 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1688 'only_matching': True,
7f29cf54
S
1689 },
1690 {
067aa17e 1691 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1692 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1693 'only_matching': True,
6496ccb4
S
1694 },
1695 {
1696 # Rental video preview
1697 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1698 'info_dict': {
1699 'id': 'uGpuVWrhIzE',
1700 'ext': 'mp4',
1701 'title': 'Piku - Trailer',
1702 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1703 'upload_date': '20150811',
1704 'uploader': 'FlixMatrix',
1705 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1706 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1707 'license': 'Standard YouTube License',
1708 },
1709 'params': {
1710 'skip_download': True,
1711 },
eb6793ba 1712 'skip': 'This video is not available.',
022a5d66 1713 },
12afdc2a
S
1714 {
1715 # YouTube Red video with episode data
1716 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1717 'info_dict': {
1718 'id': 'iqKdEhx-dD4',
1719 'ext': 'mp4',
1720 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1721 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1722 'duration': 2085,
12afdc2a
S
1723 'upload_date': '20170118',
1724 'uploader': 'Vsauce',
1725 'uploader_id': 'Vsauce',
1726 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1727 'series': 'Mind Field',
1728 'season_number': 1,
1729 'episode_number': 1,
976ae3ea 1730 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1731 'tags': 'count:12',
1732 'view_count': int,
1733 'availability': 'public',
1734 'age_limit': 0,
1735 'channel': 'Vsauce',
1736 'episode': 'Episode 1',
1737 'categories': ['Entertainment'],
1738 'season': 'Season 1',
1739 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1740 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1741 'like_count': int,
1742 'playable_in_embed': True,
1743 'live_status': 'not_live',
6c73052c 1744 'channel_follower_count': int
12afdc2a
S
1745 },
1746 'params': {
1747 'skip_download': True,
1748 },
1749 'expected_warnings': [
1750 'Skipping DASH manifest',
1751 ],
1752 },
c7121fa7
S
1753 {
1754 # The following content has been identified by the YouTube community
1755 # as inappropriate or offensive to some audiences.
1756 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1757 'info_dict': {
1758 'id': '6SJNVb0GnPI',
1759 'ext': 'mp4',
1760 'title': 'Race Differences in Intelligence',
1761 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1762 'duration': 965,
1763 'upload_date': '20140124',
1764 'uploader': 'New Century Foundation',
1765 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1766 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1767 },
1768 'params': {
1769 'skip_download': True,
1770 },
545cc85d 1771 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1772 },
022a5d66
S
1773 {
1774 # itag 212
1775 'url': '1t24XAntNCY',
1776 'only_matching': True,
fd5c4aab
S
1777 },
1778 {
1779 # geo restricted to JP
1780 'url': 'sJL6WA-aGkQ',
1781 'only_matching': True,
1782 },
cd5a74a2
S
1783 {
1784 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1785 'only_matching': True,
1786 },
bc2ca1bb 1787 {
1788 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1789 'only_matching': True,
1790 },
1791 {
1792 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1793 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1794 'only_matching': True,
1795 },
825cd268
RA
1796 {
1797 # DRM protected
1798 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1799 'only_matching': True,
4fe54c12
S
1800 },
1801 {
1802 # Video with unsupported adaptive stream type formats
1803 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1804 'info_dict': {
1805 'id': 'Z4Vy8R84T1U',
1806 'ext': 'mp4',
1807 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1808 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1809 'duration': 433,
1810 'upload_date': '20130923',
1811 'uploader': 'Amelia Putri Harwita',
1812 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1813 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1814 'formats': 'maxcount:10',
1815 },
1816 'params': {
1817 'skip_download': True,
1818 'youtube_include_dash_manifest': False,
1819 },
5429d6a9 1820 'skip': 'not actual anymore',
5caabd3c 1821 },
1822 {
822b9d9c 1823 # Youtube Music Auto-generated description
5caabd3c 1824 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1825 'info_dict': {
1826 'id': 'MgNrAu2pzNs',
1827 'ext': 'mp4',
1828 'title': 'Voyeur Girl',
1829 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1830 'upload_date': '20190312',
5429d6a9
S
1831 'uploader': 'Stephen - Topic',
1832 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1833 'artist': 'Stephen',
1834 'track': 'Voyeur Girl',
1835 'album': 'it\'s too much love to know my dear',
1836 'release_date': '20190313',
1837 'release_year': 2019,
976ae3ea 1838 'alt_title': 'Voyeur Girl',
1839 'view_count': int,
1840 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1841 'playable_in_embed': True,
1842 'like_count': int,
1843 'categories': ['Music'],
1844 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1845 'channel': 'Stephen',
1846 'availability': 'public',
1847 'creator': 'Stephen',
1848 'duration': 169,
1849 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1850 'age_limit': 0,
1851 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1852 'tags': 'count:11',
1853 'live_status': 'not_live',
6c73052c 1854 'channel_follower_count': int
5caabd3c 1855 },
1856 'params': {
1857 'skip_download': True,
1858 },
1859 },
66b48727
RA
1860 {
1861 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1862 'only_matching': True,
1863 },
011e75e6
S
1864 {
1865 # invalid -> valid video id redirection
1866 'url': 'DJztXj2GPfl',
1867 'info_dict': {
1868 'id': 'DJztXj2GPfk',
1869 'ext': 'mp4',
1870 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1871 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1872 'upload_date': '20090125',
1873 'uploader': 'Prochorowka',
1874 'uploader_id': 'Prochorowka',
1875 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1876 'artist': 'Panjabi MC',
1877 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1878 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1879 },
1880 'params': {
1881 'skip_download': True,
1882 },
545cc85d 1883 'skip': 'Video unavailable',
ea74e00b
DP
1884 },
1885 {
1886 # empty description results in an empty string
1887 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1888 'info_dict': {
1889 'id': 'x41yOUIvK2k',
1890 'ext': 'mp4',
1891 'title': 'IMG 3456',
1892 'description': '',
1893 'upload_date': '20170613',
1894 'uploader_id': 'ElevageOrVert',
1895 'uploader': 'ElevageOrVert',
976ae3ea 1896 'view_count': int,
1897 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1898 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1899 'like_count': int,
1900 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1901 'tags': [],
1902 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1903 'availability': 'public',
1904 'age_limit': 0,
1905 'categories': ['Pets & Animals'],
1906 'duration': 7,
1907 'playable_in_embed': True,
1908 'live_status': 'not_live',
1909 'channel': 'ElevageOrVert',
6c73052c 1910 'channel_follower_count': int
ea74e00b
DP
1911 },
1912 'params': {
1913 'skip_download': True,
1914 },
1915 },
a0566bbf 1916 {
29f7c58a 1917 # with '};' inside yt initial data (see [1])
1918 # see [2] for an example with '};' inside ytInitialPlayerResponse
1919 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1920 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1921 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1922 'info_dict': {
1923 'id': 'CHqg6qOn4no',
1924 'ext': 'mp4',
1925 'title': 'Part 77 Sort a list of simple types in c#',
1926 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1927 'upload_date': '20130831',
1928 'uploader_id': 'kudvenkat',
1929 'uploader': 'kudvenkat',
976ae3ea 1930 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
1931 'like_count': int,
1932 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
1933 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
1934 'live_status': 'not_live',
1935 'categories': ['Education'],
1936 'availability': 'public',
1937 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
1938 'tags': 'count:12',
1939 'playable_in_embed': True,
1940 'age_limit': 0,
1941 'view_count': int,
1942 'duration': 522,
1943 'channel': 'kudvenkat',
12a1b225 1944 'comment_count': int,
6c73052c 1945 'channel_follower_count': int
a0566bbf 1946 },
1947 'params': {
1948 'skip_download': True,
1949 },
1950 },
29f7c58a 1951 {
1952 # another example of '};' in ytInitialData
1953 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1954 'only_matching': True,
1955 },
1956 {
1957 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1958 'only_matching': True,
1959 },
545cc85d 1960 {
cc2db878 1961 # https://github.com/ytdl-org/youtube-dl/pull/28094
1962 'url': 'OtqTfy26tG0',
1963 'info_dict': {
1964 'id': 'OtqTfy26tG0',
1965 'ext': 'mp4',
1966 'title': 'Burn Out',
1967 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1968 'upload_date': '20141120',
1969 'uploader': 'The Cinematic Orchestra - Topic',
1970 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1971 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1972 'artist': 'The Cinematic Orchestra',
1973 'track': 'Burn Out',
1974 'album': 'Every Day',
976ae3ea 1975 'like_count': int,
1976 'live_status': 'not_live',
1977 'alt_title': 'Burn Out',
1978 'duration': 614,
1979 'age_limit': 0,
1980 'view_count': int,
1981 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1982 'creator': 'The Cinematic Orchestra',
1983 'channel': 'The Cinematic Orchestra',
1984 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
1985 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1986 'availability': 'public',
1987 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
1988 'categories': ['Music'],
1989 'playable_in_embed': True,
6c73052c 1990 'channel_follower_count': int
cc2db878 1991 },
1992 'params': {
1993 'skip_download': True,
1994 },
545cc85d 1995 },
bc2ca1bb 1996 {
1997 # controversial video, only works with bpctr when authenticated with cookies
1998 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1999 'only_matching': True,
2000 },
a1a7907b 2001 {
2002 # controversial video, requires bpctr/contentCheckOk
2003 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2004 'info_dict': {
2005 'id': 'SZJvDhaSDnc',
2006 'ext': 'mp4',
2007 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2008 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
976ae3ea 2009 'uploader': 'CBS Mornings',
11f9be09 2010 'uploader_id': 'CBSThisMorning',
a1a7907b 2011 'upload_date': '20140716',
976ae3ea 2012 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2013 'duration': 170,
2014 'categories': ['News & Politics'],
2015 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
2016 'view_count': int,
2017 'channel': 'CBS Mornings',
2018 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2019 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2020 'age_limit': 18,
2021 'availability': 'needs_auth',
2022 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2023 'like_count': int,
2024 'live_status': 'not_live',
2025 'playable_in_embed': True,
6c73052c 2026 'channel_follower_count': int
a1a7907b 2027 }
2028 },
f7ad7160 2029 {
2030 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2031 'url': 'cBvYw8_A0vQ',
2032 'info_dict': {
2033 'id': 'cBvYw8_A0vQ',
2034 'ext': 'mp4',
2035 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2036 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2037 'upload_date': '20201120',
2038 'uploader': 'Walk around Japan',
2039 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2040 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
976ae3ea 2041 'duration': 1456,
2042 'categories': ['Travel & Events'],
2043 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2044 'view_count': int,
2045 'channel': 'Walk around Japan',
2046 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2047 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2048 'age_limit': 0,
2049 'availability': 'public',
2050 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2051 'live_status': 'not_live',
2052 'playable_in_embed': True,
6c73052c 2053 'channel_follower_count': int
f7ad7160 2054 },
2055 'params': {
2056 'skip_download': True,
2057 },
0fb983f6 2058 }, {
2059 # Has multiple audio streams
2060 'url': 'WaOKSUlf4TM',
2061 'only_matching': True
9297939e 2062 }, {
2063 # Requires Premium: has format 141 when requested using YTM url
2064 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2065 'only_matching': True
2066 }, {
120916da 2067 # multiple subtitles with same lang_code
2068 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2069 'only_matching': True,
109dd3b2 2070 }, {
2071 # Force use android client fallback
2072 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2073 'info_dict': {
2074 'id': 'YOelRv7fMxY',
11f9be09 2075 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 2076 'ext': '3gp',
2077 'upload_date': '20210624',
2078 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2079 'uploader': 'colinfurze',
11f9be09 2080 'uploader_id': 'colinfurze',
109dd3b2 2081 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2082 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2083 'duration': 596,
2084 'categories': ['Entertainment'],
2085 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2086 'view_count': int,
2087 'channel': 'colinfurze',
2088 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2089 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2090 'age_limit': 0,
2091 'availability': 'public',
2092 'like_count': int,
2093 'live_status': 'not_live',
2094 'playable_in_embed': True,
6c73052c 2095 'channel_follower_count': int
109dd3b2 2096 },
2097 'params': {
2098 'format': '17', # 3gp format available on android
2099 'extractor_args': {'youtube': {'player_client': ['android']}},
2100 },
120916da 2101 },
109dd3b2 2102 {
2103 # Skip download of additional client configs (remix client config in this case)
2104 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2105 'only_matching': True,
2106 'params': {
2107 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2108 },
8fc54b12 2109 }, {
2110 # shorts
2111 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2112 'only_matching': True,
9222c381 2113 }, {
2114 'note': 'Storyboards',
2115 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2116 'info_dict': {
2117 'id': '5KLPxDtMqe8',
2118 'ext': 'mhtml',
2119 'format_id': 'sb0',
2120 'title': 'Your Brain is Plastic',
2121 'uploader_id': 'scishow',
2122 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2123 'upload_date': '20140324',
2124 'uploader': 'SciShow',
976ae3ea 2125 'like_count': int,
2126 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2127 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2128 'view_count': int,
2129 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2130 'playable_in_embed': True,
2131 'tags': 'count:12',
2132 'uploader_url': 'http://www.youtube.com/user/scishow',
2133 'availability': 'public',
2134 'channel': 'SciShow',
2135 'live_status': 'not_live',
2136 'duration': 248,
2137 'categories': ['Education'],
2138 'age_limit': 0,
6c73052c 2139 'channel_follower_count': int
9222c381 2140 }, 'params': {'format': 'mhtml', 'skip_download': True}
992f9a73 2141 }, {
2142 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2143 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2144 'info_dict': {
2145 'id': '2NUZ8W2llS4',
2146 'ext': 'mp4',
2147 'title': 'The NP that test your phone performance 🙂',
2148 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2149 'uploader': 'Leon Nguyen',
2150 'uploader_id': 'VNSXIII',
2151 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2152 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2153 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2154 'duration': 21,
2155 'view_count': int,
2156 'age_limit': 0,
2157 'categories': ['Gaming'],
2158 'tags': 'count:23',
2159 'playable_in_embed': True,
2160 'live_status': 'not_live',
2161 'upload_date': '20220103',
2162 'like_count': int,
2163 'availability': 'public',
2164 'channel': 'Leon Nguyen',
2165 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
12a1b225 2166 'comment_count': int,
992f9a73 2167 'channel_follower_count': int
2168 }
1ff88b7a 2169 }, {
2170 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2171 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2172 'info_dict': {
2173 'id': '2NUZ8W2llS4',
2174 'ext': 'mp4',
2175 'title': 'The NP that test your phone performance 🙂',
2176 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2177 'uploader': 'Leon Nguyen',
2178 'uploader_id': 'VNSXIII',
2179 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2180 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2181 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2182 'duration': 21,
2183 'view_count': int,
2184 'age_limit': 0,
2185 'categories': ['Gaming'],
2186 'tags': 'count:23',
2187 'playable_in_embed': True,
2188 'live_status': 'not_live',
2189 'upload_date': '20220102',
2190 'like_count': int,
2191 'availability': 'public',
2192 'channel': 'Leon Nguyen',
2193 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2194 'comment_count': int,
2195 'channel_follower_count': int
2196 },
2197 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
992f9a73 2198 }, {
2199 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2200 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2201 'info_dict': {
2202 'id': 'mzZzzBU6lrM',
2203 'ext': 'mp4',
2204 'title': 'I Met GeorgeNotFound In Real Life...',
2205 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2206 'uploader': 'Quackity',
2207 'uploader_id': 'QuackityHQ',
2208 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2209 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2210 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2211 'duration': 955,
2212 'view_count': int,
2213 'age_limit': 0,
2214 'categories': ['Entertainment'],
2215 'tags': 'count:26',
2216 'playable_in_embed': True,
2217 'live_status': 'not_live',
2218 'release_timestamp': 1641172509,
2219 'release_date': '20220103',
2220 'upload_date': '20220103',
2221 'like_count': int,
2222 'availability': 'public',
2223 'channel': 'Quackity',
2224 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2225 'channel_follower_count': int
2226 }
2227 },
2228 { # continuous livestream. Microformat upload date should be preferred.
2229 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2230 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2231 'info_dict': {
2232 'id': 'kgx4WGK0oNU',
2233 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2234 'ext': 'mp4',
2235 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2236 'availability': 'public',
2237 'age_limit': 0,
2238 'release_timestamp': 1637975704,
2239 'upload_date': '20210619',
2240 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2241 'live_status': 'is_live',
2242 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2243 'uploader': '阿鲍Abao',
2244 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2245 'channel': 'Abao in Tokyo',
2246 'channel_follower_count': int,
2247 'release_date': '20211127',
2248 'tags': 'count:39',
2249 'categories': ['People & Blogs'],
2250 'like_count': int,
2251 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2252 'view_count': int,
2253 'playable_in_embed': True,
2254 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2255 },
2256 'params': {'skip_download': True}
6e634cbe 2257 }, {
2258 # Story. Requires specific player params to work.
ee27297f 2259 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
6e634cbe 2260 'info_dict': {
ee27297f 2261 'id': 'vv8qTUWmulI',
6e634cbe 2262 'ext': 'mp4',
ee27297f 2263 'availability': 'unlisted',
2264 'view_count': int,
2265 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2266 'upload_date': '20220526',
2267 'categories': ['Education'],
2268 'title': 'Story',
2269 'channel': 'IT\'S HISTORY',
2270 'description': '',
2271 'uploader_id': 'BlastfromthePast',
2272 'duration': 12,
2273 'uploader': 'IT\'S HISTORY',
6e634cbe 2274 'playable_in_embed': True,
6e634cbe 2275 'age_limit': 0,
6e634cbe 2276 'live_status': 'not_live',
ee27297f 2277 'tags': [],
2278 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2279 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2280 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
12a1b225
A
2281 },
2282 'skip': 'stories get removed after some period of time',
ee27297f 2283 }, {
2284 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2285 'info_dict': {
2286 'id': 'tjjjtzRLHvA',
2287 'ext': 'mp4',
2288 'title': 'ハッシュタグ無し };if window.ytcsi',
2289 'upload_date': '20220323',
2290 'like_count': int,
2291 'availability': 'unlisted',
2292 'channel': 'nao20010128nao',
2293 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2294 'age_limit': 0,
2295 'uploader': 'nao20010128nao',
2296 'uploader_id': 'nao20010128nao',
2297 'categories': ['Music'],
6e634cbe 2298 'view_count': int,
2299 'description': '',
ee27297f 2300 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2301 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2302 'live_status': 'not_live',
2303 'playable_in_embed': True,
2304 'channel_follower_count': int,
2305 'duration': 6,
2306 'tags': [],
2307 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
6e634cbe 2308 }
a4166234 2309 }, {
2310 'note': '6 channel audio',
2311 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2312 'only_matching': True,
6e634cbe 2313 }
2eb88d95
PH
2314 ]
2315
f2e8dbcc 2316 _WEBPAGE_TESTS = [
2317 # YouTube <object> embed
2318 {
2319 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2320 'md5': '873c81d308b979f0e23ee7e620b312a3',
2321 'info_dict': {
2322 'id': 'msN87y-iEx0',
2323 'ext': 'mp4',
2324 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2325 'upload_date': '20080526',
2326 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2327 'uploader': 'Christopher Sykes',
2328 'uploader_id': 'ChristopherJSykes',
2329 'age_limit': 0,
2330 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2331 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2332 'playable_in_embed': True,
2333 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2334 'like_count': int,
2335 'comment_count': int,
2336 'channel': 'Christopher Sykes',
2337 'live_status': 'not_live',
2338 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2339 'availability': 'public',
2340 'duration': 195,
2341 'view_count': int,
2342 'categories': ['Science & Technology'],
2343 'channel_follower_count': int,
2344 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2345 },
2346 'params': {
2347 'skip_download': True,
2348 }
2349 },
2350 ]
2351
201c1459 2352 @classmethod
2353 def suitable(cls, url):
4dfbf869 2354 from ..utils import parse_qs
2355
201c1459 2356 qs = parse_qs(url)
2357 if qs.get('list', [None])[0]:
2358 return False
86e5f3ed 2359 return super().suitable(url)
201c1459 2360
e0df6211 2361 def __init__(self, *args, **kwargs):
86e5f3ed 2362 super().__init__(*args, **kwargs)
545cc85d 2363 self._code_cache = {}
83799698 2364 self._player_cache = {}
e0df6211 2365
adbc4ec4 2366 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
adbc4ec4
THD
2367 lock = threading.Lock()
2368
2369 is_live = True
185bf310 2370 start_time = time.time()
adbc4ec4
THD
2371 formats = [f for f in formats if f.get('is_from_start')]
2372
185bf310 2373 def refetch_manifest(format_id, delay):
2374 nonlocal formats, start_time, is_live
2375 if time.time() <= start_time + delay:
adbc4ec4
THD
2376 return
2377
2378 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2379 video_details = traverse_obj(
2380 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2381 microformats = traverse_obj(
2382 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2383 expected_type=dict, default=[])
c646d76f 2384 _, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
185bf310 2385 start_time = time.time()
adbc4ec4 2386
185bf310 2387 def mpd_feed(format_id, delay):
adbc4ec4
THD
2388 """
2389 @returns (manifest_url, manifest_stream_number, is_live) or None
2390 """
2391 with lock:
185bf310 2392 refetch_manifest(format_id, delay)
adbc4ec4
THD
2393
2394 f = next((f for f in formats if f['format_id'] == format_id), None)
2395 if not f:
185bf310 2396 if not is_live:
2397 self.to_screen(f'{video_id}: Video is no longer live')
2398 else:
2399 self.report_warning(
2400 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
adbc4ec4
THD
2401 return None
2402 return f['manifest_url'], f['manifest_stream_number'], is_live
2403
2404 for f in formats:
a539f065 2405 f['is_live'] = True
adbc4ec4
THD
2406 f['protocol'] = 'http_dash_segments_generator'
2407 f['fragments'] = functools.partial(
2408 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2409
2410 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2411 FETCH_SPAN, MAX_DURATION = 5, 432000
2412
2413 mpd_url, stream_number, is_live = None, None, True
2414
2415 begin_index = 0
2416 download_start_time = ctx.get('start') or time.time()
2417
2418 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2419 if lack_early_segments:
2420 self.report_warning(bug_reports_message(
2421 'Starting download from the last 120 hours of the live stream since '
2422 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2423 lack_early_segments = True
2424
2425 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2426 fragments, fragment_base_url = None, None
2427
a539f065 2428 def _extract_sequence_from_mpd(refresh_sequence, immediate):
adbc4ec4
THD
2429 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2430 # Obtain from MPD's maximum seq value
2431 old_mpd_url = mpd_url
185bf310 2432 last_error = ctx.pop('last_error', None)
14f25df2 2433 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
185bf310 2434 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2435 or (mpd_url, stream_number, False))
2436 if not refresh_sequence:
2437 if expire_fast and not is_live:
2438 return False, last_seq
2439 elif old_mpd_url == mpd_url:
2440 return True, last_seq
adbc4ec4
THD
2441 try:
2442 fmts, _ = self._extract_mpd_formats_and_subtitles(
2443 mpd_url, None, note=False, errnote=False, fatal=False)
2444 except ExtractorError:
2445 fmts = None
2446 if not fmts:
a539f065 2447 no_fragment_score += 2
adbc4ec4
THD
2448 return False, last_seq
2449 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2450 fragments = fmt_info['fragments']
2451 fragment_base_url = fmt_info['fragment_base_url']
2452 assert fragment_base_url
2453
2454 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2455 return True, _last_seq
2456
2457 while is_live:
2458 fetch_time = time.time()
2459 if no_fragment_score > 30:
2460 return
2461 if last_segment_url:
2462 # Obtain from "X-Head-Seqnum" header value from each segment
2463 try:
2464 urlh = self._request_webpage(
2465 last_segment_url, None, note=False, errnote=False, fatal=False)
2466 except ExtractorError:
2467 urlh = None
2468 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2469 if last_seq is None:
a539f065 2470 no_fragment_score += 2
adbc4ec4
THD
2471 last_segment_url = None
2472 continue
2473 else:
a539f065
LNO
2474 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2475 no_fragment_score += 2
185bf310 2476 if not should_continue:
adbc4ec4
THD
2477 continue
2478
2479 if known_idx > last_seq:
2480 last_segment_url = None
2481 continue
2482
2483 last_seq += 1
2484
2485 if begin_index < 0 and known_idx < 0:
2486 # skip from the start when it's negative value
2487 known_idx = last_seq + begin_index
2488 if lack_early_segments:
2489 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2490 try:
2491 for idx in range(known_idx, last_seq):
2492 # do not update sequence here or you'll get skipped some part of it
a539f065 2493 should_continue, _ = _extract_sequence_from_mpd(False, False)
185bf310 2494 if not should_continue:
adbc4ec4
THD
2495 known_idx = idx - 1
2496 raise ExtractorError('breaking out of outer loop')
2497 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2498 yield {
2499 'url': last_segment_url,
36195c44 2500 'fragment_count': last_seq,
adbc4ec4
THD
2501 }
2502 if known_idx == last_seq:
2503 no_fragment_score += 5
2504 else:
2505 no_fragment_score = 0
2506 known_idx = last_seq
2507 except ExtractorError:
2508 continue
2509
2510 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2511
b6de707d 2512 def _extract_player_url(self, *ytcfgs, webpage=None):
2513 player_url = traverse_obj(
2514 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
14f25df2 2515 get_all=False, expected_type=str)
11f9be09 2516 if not player_url:
b6de707d 2517 return
60f393e4 2518 return urljoin('https://www.youtube.com', player_url)
109dd3b2 2519
b6de707d 2520 def _download_player_url(self, video_id, fatal=False):
2521 res = self._download_webpage(
2522 'https://www.youtube.com/iframe_api',
2523 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2524 if res:
2525 player_version = self._search_regex(
2526 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2527 if player_version:
2528 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2529
60064c53
PH
2530 def _signature_cache_id(self, example_sig):
2531 """ Return a string representation of a signature """
14f25df2 2532 return '.'.join(str(len(part)) for part in example_sig.split('.'))
60064c53 2533
e40c758c
S
2534 @classmethod
2535 def _extract_player_info(cls, player_url):
2536 for player_re in cls._PLAYER_INFO_RE:
2537 id_m = re.search(player_re, player_url)
2538 if id_m:
2539 break
2540 else:
c081b35c 2541 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 2542 return id_m.group('id')
e40c758c 2543
404f611f 2544 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2545 player_id = self._extract_player_info(player_url)
2546 if player_id not in self._code_cache:
1276a43a 2547 code = self._download_webpage(
109dd3b2 2548 player_url, video_id, fatal=fatal,
2549 note='Downloading player ' + player_id,
2550 errnote='Download of %s failed' % player_url)
1276a43a 2551 if code:
2552 self._code_cache[player_id] = code
404f611f 2553 return self._code_cache.get(player_id)
109dd3b2 2554
e40c758c 2555 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2556 player_id = self._extract_player_info(player_url)
e0df6211 2557
c4417ddb 2558 # Read from filesystem cache
86e5f3ed 2559 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
c4417ddb 2560 assert os.path.basename(func_id) == func_id
a0e07d31 2561
ae61d108 2562 self.write_debug(f'Extracting signature function {func_id}')
580ce007 2563 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
83799698 2564
580ce007 2565 if not cache_spec:
2566 code = self._load_player(video_id, player_url)
404f611f 2567 if code:
109dd3b2 2568 res = self._parse_sig_js(code)
ac668111 2569 test_string = ''.join(map(chr, range(len(example_sig))))
580ce007 2570 cache_spec = [ord(c) for c in res(test_string)]
9809740b 2571 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
580ce007 2572
2573 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 2574
60064c53 2575 def _print_sig_code(self, func, example_sig):
404f611f 2576 if not self.get_param('youtube_print_sig_code'):
2577 return
2578
edf3e38e
PH
2579 def gen_sig_code(idxs):
2580 def _genslice(start, end, step):
78caa52a 2581 starts = '' if start == 0 else str(start)
8bcc8756 2582 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 2583 steps = '' if step == 1 else (':%d' % step)
86e5f3ed 2584 return f's[{starts}{ends}{steps}]'
edf3e38e
PH
2585
2586 step = None
7af808a5
PH
2587 # Quelch pyflakes warnings - start will be set when step is set
2588 start = '(Never used)'
edf3e38e
PH
2589 for i, prev in zip(idxs[1:], idxs[:-1]):
2590 if step is not None:
2591 if i - prev == step:
2592 continue
2593 yield _genslice(start, prev, step)
2594 step = None
2595 continue
2596 if i - prev in [-1, 1]:
2597 step = i - prev
2598 start = prev
2599 continue
2600 else:
78caa52a 2601 yield 's[%d]' % prev
edf3e38e 2602 if step is None:
78caa52a 2603 yield 's[%d]' % i
edf3e38e
PH
2604 else:
2605 yield _genslice(start, i, step)
2606
ac668111 2607 test_string = ''.join(map(chr, range(len(example_sig))))
c705320f 2608 cache_res = func(test_string)
edf3e38e 2609 cache_spec = [ord(c) for c in cache_res]
78caa52a 2610 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53 2611 signature_id_tuple = '(%s)' % (
14f25df2 2612 ', '.join(str(len(p)) for p in example_sig.split('.')))
69ea8ca4 2613 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 2614 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 2615 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 2616
e0df6211
PH
2617 def _parse_sig_js(self, jscode):
2618 funcname = self._search_regex(
abefc03f
S
2619 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2620 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
2621 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2622 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2623 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2624 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 2625 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
2626 # Obsolete patterns
2627 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 2628 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
2629 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2630 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2631 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2632 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2633 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2634 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2635 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2636
2637 jsi = JSInterpreter(jscode)
2638 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2639 return lambda s: initial_function([s])
2640
580ce007 2641 def _cached(self, func, *cache_id):
2642 def inner(*args, **kwargs):
2643 if cache_id not in self._player_cache:
2644 try:
2645 self._player_cache[cache_id] = func(*args, **kwargs)
2646 except ExtractorError as e:
2647 self._player_cache[cache_id] = e
2648 except Exception as e:
2649 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2650
2651 ret = self._player_cache[cache_id]
2652 if isinstance(ret, Exception):
2653 raise ret
2654 return ret
2655 return inner
2656
545cc85d 2657 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2658 """Turn the encrypted s field into a working signature"""
580ce007 2659 extract_sig = self._cached(
2660 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2661 func = extract_sig(video_id, player_url, s)
2662 self._print_sig_code(func, s)
2663 return func(s)
404f611f 2664
2665 def _decrypt_nsig(self, s, video_id, player_url):
2666 """Turn the encrypted n field into a working signature"""
2667 if player_url is None:
2668 raise ExtractorError('Cannot decrypt nsig without player_url')
60f393e4 2669 player_url = urljoin('https://www.youtube.com', player_url)
404f611f 2670
b505e851 2671 try:
2672 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2673 except ExtractorError as e:
2674 raise ExtractorError('Unable to extract nsig function code', cause=e)
580ce007 2675 if self.get_param('youtube_print_sig_code'):
2676 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
404f611f 2677
25836db6 2678 try:
2679 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2680 ret = extract_nsig(jsi, func_code)(s)
2681 except JSInterpreter.Exception as e:
2682 try:
992dc6b4 2683 jsi = PhantomJSwrapper(self, timeout=5000)
25836db6 2684 except ExtractorError:
2685 raise e
2686 self.report_warning(
2687 f'Native nsig extraction failed: Trying with PhantomJS\n'
2688 f' n = {s} ; player = {player_url}', video_id)
2689 self.write_debug(e)
2690
2691 args, func_body = func_code
2692 ret = jsi.execute(
2693 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2694 video_id=video_id, note='Executing signature code').strip()
580ce007 2695
2696 self.write_debug(f'Decrypted nsig {s} => {ret}')
2697 return ret
2698
90a1df30 2699 def _extract_n_function_name(self, jscode):
2700 funcname, idx = self._search_regex(
2701 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2702 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2703 if not idx:
2704 return funcname
2705
2706 return json.loads(js_to_json(self._search_regex(
2707 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2708 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2709
580ce007 2710 def _extract_n_function_code(self, video_id, player_url):
404f611f 2711 player_id = self._extract_player_info(player_url)
05deb747 2712 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
580ce007 2713 jscode = func_code or self._load_player(video_id, player_url)
2714 jsi = JSInterpreter(jscode)
404f611f 2715
2716 if func_code:
580ce007 2717 return jsi, player_id, func_code
404f611f 2718
b505e851 2719 func_name = self._extract_n_function_name(jscode)
2720
2721 # For redundancy
2722 func_code = self._search_regex(
2723 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
2724 # NB: The end of the regex is intentionally kept strict
2725 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
2726 jscode, 'nsig function', group=('var', 'code'), default=None)
2727 if func_code:
2728 func_code = ([func_code[0]], func_code[1])
2729 else:
2730 self.write_debug('Extracting nsig function with jsinterp')
2731 func_code = jsi.extract_function_code(func_name)
2732
580ce007 2733 self.cache.store('youtube-nsig', player_id, func_code)
2734 return jsi, player_id, func_code
2735
2736 def _extract_n_function_from_code(self, jsi, func_code):
8f53dc44 2737 func = jsi.extract_function_from_code(*func_code)
f6ca640b 2738
580ce007 2739 def extract_nsig(s):
25836db6 2740 try:
2741 ret = func([s])
2742 except JSInterpreter.Exception:
2743 raise
2744 except Exception as e:
2745 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
2746
f6ca640b 2747 if ret.startswith('enhanced_except_'):
25836db6 2748 raise JSInterpreter.Exception('Signature function returned an exception')
f6ca640b 2749 return ret
580ce007 2750
2751 return extract_nsig
e0df6211 2752
109dd3b2 2753 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2754 """
2755 Extract signatureTimestamp (sts)
2756 Required to tell API what sig/player version is in use.
2757 """
2758 sts = None
2759 if isinstance(ytcfg, dict):
2760 sts = int_or_none(ytcfg.get('STS'))
2761
2762 if not sts:
2763 # Attempt to extract from player
2764 if player_url is None:
2765 error_msg = 'Cannot extract signature timestamp without player_url.'
2766 if fatal:
2767 raise ExtractorError(error_msg)
2768 self.report_warning(error_msg)
2769 return
404f611f 2770 code = self._load_player(video_id, player_url, fatal=fatal)
2771 if code:
109dd3b2 2772 sts = int_or_none(self._search_regex(
2773 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2774 'JS player signature timestamp', group='sts', fatal=fatal))
2775 return sts
2776
11f9be09 2777 def _mark_watched(self, video_id, player_responses):
06cc8f10
B
2778 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
2779 label = 'fully ' if is_full else ''
2780 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
2781 expected_type=url_or_none)
2782 if not url:
2783 self.report_warning(f'Unable to mark {label}watched')
2784 return
14f25df2 2785 parsed_url = urllib.parse.urlparse(url)
2786 qs = urllib.parse.parse_qs(parsed_url.query)
06cc8f10
B
2787
2788 # cpn generation algorithm is reverse engineered from base.js.
2789 # In fact it works even with dummy cpn.
2790 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2791 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2792
2793 # # more consistent results setting it to right before the end
2794 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
2795
2796 qs.update({
2797 'ver': ['2'],
2798 'cpn': [cpn],
2799 'cmt': video_length,
2800 'el': 'detailpage', # otherwise defaults to "shorts"
2801 })
2802
2803 if is_full:
2804 # these seem to mark watchtime "history" in the real world
2805 # they're required, so send in a single value
2806 qs.update({
2807 'st': video_length,
2808 'et': video_length,
2809 })
2810
14f25df2 2811 url = urllib.parse.urlunparse(
2812 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
06cc8f10
B
2813
2814 self._download_webpage(
2815 url, video_id, f'Marking {label}watched',
2816 'Unable to mark watched', fatal=False)
d77ab8e2 2817
bfd973ec 2818 @classmethod
2819 def _extract_from_webpage(cls, url, webpage):
2820 # Invidious Instances
2821 # https://github.com/yt-dlp/yt-dlp/issues/195
2822 # https://github.com/iv-org/invidious/pull/1730
2823 mobj = re.search(
2824 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
2825 webpage)
2826 if mobj:
2827 yield cls.url_result(mobj.group('url'), cls)
2828 raise cls.StopExtraction()
2829
2830 yield from super()._extract_from_webpage(url, webpage)
66c9fa36
S
2831
2832 # lazyYT YouTube embed
bfd973ec 2833 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
2834 yield cls.url_result(unescapeHTML(id_), cls, id_)
66c9fa36
S
2835
2836 # Wordpress "YouTube Video Importer" plugin
bfd973ec 2837 for m in re.findall(r'''(?x)<div[^>]+
2838 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2839 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
2840 yield cls.url_result(m[-1], cls, m[-1])
66c9fa36 2841
97665381
PH
2842 @classmethod
2843 def extract_id(cls, url):
ae61d108 2844 video_id = cls.get_temp_id(url)
2845 if not video_id:
2846 raise ExtractorError(f'Invalid URL: {url}')
2847 return video_id
c5e8d7af 2848
7c365c21 2849 def _extract_chapters_from_json(self, data, duration):
2850 chapter_list = traverse_obj(
2851 data, (
2852 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2853 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2854 ), expected_type=list)
2855
2856 return self._extract_chapters(
2857 chapter_list,
2858 chapter_time=lambda chapter: float_or_none(
2859 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2860 chapter_title=lambda chapter: traverse_obj(
2861 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2862 duration=duration)
2863
2864 def _extract_chapters_from_engagement_panel(self, data, duration):
2865 content_list = traverse_obj(
8bdd16b4 2866 data,
7c365c21 2867 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2868 expected_type=list, default=[])
052e1350 2869 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2870 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2871
1890fc63 2872 return next(filter(None, (
2873 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2874 chapter_time, chapter_title, duration)
2875 for contents in content_list)), [])
7c365c21 2876
1890fc63 2877 def _extract_chapters_from_description(self, description, duration):
2878 return self._extract_chapters(
2879 re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
2880 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
2881 duration=duration, strict=False)
84213ea8 2882
1890fc63 2883 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
2884 if not duration:
2885 return
2886 chapter_list = [{
2887 'start_time': chapter_time(chapter),
2888 'title': chapter_title(chapter),
2889 } for chapter in chapter_list or []]
2890 if not strict:
2891 chapter_list.sort(key=lambda c: c['start_time'] or 0)
2892
a3976e07 2893 chapters = [{'start_time': 0}]
1890fc63 2894 for idx, chapter in enumerate(chapter_list):
a3976e07 2895 if chapter['start_time'] is None:
1890fc63 2896 self.report_warning(f'Incomplete chapter {idx}')
2897 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
1890fc63 2898 chapters.append(chapter)
2899 else:
2900 self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
a3976e07 2901 return chapters[1:]
84213ea8 2902
a1c5d2ca
M
2903 def _extract_comment(self, comment_renderer, parent=None):
2904 comment_id = comment_renderer.get('commentId')
2905 if not comment_id:
2906 return
fe93e2c4 2907
052e1350 2908 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2909
49bd8c66 2910 # note: timestamp is an estimate calculated from the current time and time_text
f3aa3c3f 2911 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
052e1350 2912 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca 2913 author_id = try_get(comment_renderer,
14f25df2 2914 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
fe93e2c4 2915
49bd8c66 2916 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
14f25df2 2917 lambda x: x['likeCount']), str)) or 0
a1c5d2ca 2918 author_thumbnail = try_get(comment_renderer,
14f25df2 2919 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
a1c5d2ca
M
2920
2921 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2922 is_favorited = 'creatorHeart' in (try_get(
2923 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2924 return {
2925 'id': comment_id,
2926 'text': text,
d92f5d5a 2927 'timestamp': timestamp,
a1c5d2ca
M
2928 'time_text': time_text,
2929 'like_count': votes,
97524332 2930 'is_favorited': is_favorited,
a1c5d2ca
M
2931 'author': author,
2932 'author_id': author_id,
2933 'author_thumbnail': author_thumbnail,
2934 'author_is_uploader': author_is_uploader,
2935 'parent': parent or 'root'
2936 }
2937
46383212 2938 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2939
2940 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 2941
2942 def extract_header(contents):
2d6659b9 2943 _continuation = None
2944 for content in contents:
46383212 2945 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 2946 expected_comment_count = self._get_count(
2947 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 2948
2d6659b9 2949 if expected_comment_count:
46383212 2950 tracker['est_total'] = expected_comment_count
2951 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2952 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 2953
2954 sort_menu_item = try_get(
2955 comments_header_renderer,
2956 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2957 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2958
2959 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2960 if not _continuation:
2961 continue
2962
46383212 2963 sort_text = str_or_none(sort_menu_item.get('title'))
2964 if not sort_text:
2d6659b9 2965 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 2966 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 2967 break
a2160aa4 2968 return _continuation
a1c5d2ca 2969
2d6659b9 2970 def extract_thread(contents):
a1c5d2ca 2971 if not parent:
46383212 2972 tracker['current_page_thread'] = 0
a1c5d2ca 2973 for content in contents:
46383212 2974 if not parent and tracker['total_parent_comments'] >= max_parents:
2975 yield
a1c5d2ca 2976 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 2977 comment_renderer = get_first(
2978 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2979 expected_type=dict, default={})
a1c5d2ca 2980
a1c5d2ca
M
2981 comment = self._extract_comment(comment_renderer, parent)
2982 if not comment:
2983 continue
46383212 2984
2985 tracker['running_total'] += 1
2986 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 2987 yield comment
46383212 2988
a1c5d2ca
M
2989 # Attempt to get the replies
2990 comment_replies_renderer = try_get(
2991 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2992
2993 if comment_replies_renderer:
46383212 2994 tracker['current_page_thread'] += 1
a1c5d2ca 2995 comment_entries_iter = self._comment_entries(
99e9e001 2996 comment_replies_renderer, ytcfg, video_id,
46383212 2997 parent=comment.get('id'), tracker=tracker)
86e5f3ed 2998 yield from itertools.islice(comment_entries_iter, min(
2999 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
a1c5d2ca 3000
46383212 3001 # Keeps track of counts across recursive calls
3002 if not tracker:
3003 tracker = dict(
3004 running_total=0,
3005 est_total=0,
3006 current_page_thread=0,
3007 total_parent_comments=0,
3008 total_reply_comments=0)
3009
3010 # TODO: Deprecated
2d6659b9 3011 # YouTube comments have a max depth of 2
46383212 3012 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3013 if max_depth:
da4db748 3014 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3015 'Set max replies in the max-comments extractor argument instead')
2d6659b9 3016 if max_depth == 1 and parent:
3017 return
a1c5d2ca 3018
46383212 3019 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3020 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 3021
46383212 3022 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 3023
46383212 3024 response = None
6e634cbe 3025 is_forced_continuation = False
2d6659b9 3026 is_first_continuation = parent is None
6e634cbe 3027 if is_first_continuation and not continuation:
3028 # Sometimes you can get comments by generating the continuation yourself,
3029 # even if YouTube initially reports them being disabled - e.g. stories comments.
3030 # Note: if the comment section is actually disabled, YouTube may return a response with
3031 # required check_get_keys missing. So we will disable that check initially in this case.
3032 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3033 is_forced_continuation = True
a1c5d2ca
M
3034
3035 for page_num in itertools.count(0):
3036 if not continuation:
3037 break
46383212 3038 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3039 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 3040 if page_num == 0:
3041 if is_first_continuation:
3042 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 3043 else:
2d6659b9 3044 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 3045 tracker['current_page_thread'], comment_prog_str)
2d6659b9 3046 else:
3047 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3048 ' ' if parent else '', ' replies' if parent else '',
3049 page_num, comment_prog_str)
3050
3051 response = self._extract_response(
fe93e2c4 3052 item_id=None, query=continuation,
2d6659b9 3053 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
6e634cbe 3054 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
3055 is_forced_continuation = False
46383212 3056 continuation_contents = traverse_obj(
3057 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 3058
2d6659b9 3059 continuation = None
46383212 3060 for continuation_section in continuation_contents:
3061 continuation_items = traverse_obj(
3062 continuation_section,
3063 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3064 get_all=False, expected_type=list) or []
3065 if is_first_continuation:
3066 continuation = extract_header(continuation_items)
3067 is_first_continuation = False
2d6659b9 3068 if continuation:
a1c5d2ca 3069 break
46383212 3070 continue
a1c5d2ca 3071
46383212 3072 for entry in extract_thread(continuation_items):
3073 if not entry:
3074 return
3075 yield entry
3076 continuation = self._extract_continuation({'contents': continuation_items})
3077 if continuation:
2d6659b9 3078 break
a1c5d2ca 3079
6e634cbe 3080 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3081 if message and not parent and tracker['running_total'] == 0:
3082 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3083
3084 @staticmethod
3085 def _generate_comment_continuation(video_id):
3086 """
3087 Generates initial comment section continuation token from given video id
3088 """
3089 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3090 return base64.b64encode(token.encode()).decode()
3091
a2160aa4 3092 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 3093 """Entry for comment extraction"""
2d6659b9 3094 def _real_comment_extract(contents):
aae16f6e 3095 renderer = next((
3096 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3097 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3098 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 3099
a2160aa4 3100 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 3101 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 3102
109dd3b2 3103 @staticmethod
99e9e001 3104 def _get_checkok_params():
3105 return {'contentCheckOk': True, 'racyCheckOk': True}
3106
3107 @classmethod
3108 def _generate_player_context(cls, sts=None):
109dd3b2 3109 context = {
3110 'html5Preference': 'HTML5_PREF_WANTS',
3111 }
3112 if sts is not None:
3113 context['signatureTimestamp'] = sts
3114 return {
3115 'playbackContext': {
3116 'contentPlaybackContext': context
a1a7907b 3117 },
99e9e001 3118 **cls._get_checkok_params()
109dd3b2 3119 }
3120
e7e94f2a
D
3121 @staticmethod
3122 def _is_agegated(player_response):
3123 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 3124 return True
e7e94f2a
D
3125
3126 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3127 AGE_GATE_REASONS = (
3128 'confirm your age', 'age-restricted', 'inappropriate', # reason
3129 'age_verification_required', 'age_check_required', # status
3130 )
3131 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3132
3133 @staticmethod
3134 def _is_unplayable(player_response):
3135 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 3136
50ac0e54 3137 _STORY_PLAYER_PARAMS = '8AEB'
3138
3139 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
109dd3b2 3140
11f9be09 3141 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3142 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 3143 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 3144 headers = self.generate_api_headers(
99e9e001 3145 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 3146
6e634cbe 3147 yt_query = {
3148 'videoId': video_id,
6e634cbe 3149 }
50ac0e54 3150 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3151 yt_query['params'] = self._STORY_PLAYER_PARAMS
3152
11f9be09 3153 yt_query.update(self._generate_player_context(sts))
3154 return self._extract_response(
3155 item_id=video_id, ep='player', query=yt_query,
379e44ed 3156 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 3157 default_client=client,
11f9be09 3158 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3159 ) or None
3160
11f9be09 3161 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 3162 requested_clients = []
d0d012d4 3163 default = ['android', 'web']
000c15a4 3164 allowed_clients = sorted(
86e5f3ed 3165 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
000c15a4 3166 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 3167 for client in self._configuration_arg('player_client'):
3168 if client in allowed_clients:
3169 requested_clients.append(client)
d0d012d4 3170 elif client == 'default':
3171 requested_clients.extend(default)
b4c055ba 3172 elif client == 'all':
3173 requested_clients.extend(allowed_clients)
3174 else:
3175 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 3176 if not requested_clients:
d0d012d4 3177 requested_clients = default
cf7e015f 3178
11f9be09 3179 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3180 requested_clients.extend(
e7e94f2a 3181 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 3182
11f9be09 3183 return orderedSet(requested_clients)
cf7e015f 3184
50ac0e54 3185 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
11f9be09 3186 initial_pr = None
3187 if webpage:
b7c47b74 3188 initial_pr = self._search_json(
3189 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
6b09401b 3190
ae729626 3191 all_clients = set(clients)
c0bc527b 3192 clients = clients[::-1]
b6de707d 3193 prs = []
e7e94f2a 3194
ae729626 3195 def append_client(*client_names):
e7870111 3196 """ Append the first client name that exists but not already used """
ae729626 3197 for client_name in client_names:
e7870111
D
3198 actual_client = _split_innertube_client(client_name)[0]
3199 if actual_client in INNERTUBE_CLIENTS:
3200 if actual_client not in all_clients:
ae729626 3201 clients.append(client_name)
e7870111
D
3202 all_clients.add(actual_client)
3203 return
e7e94f2a 3204
379e44ed 3205 # Android player_response does not have microFormats which are needed for
3206 # extraction of some data. So we return the initial_pr with formats
3207 # stripped out even if not requested by the user
3208 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 3209 if initial_pr:
3210 pr = dict(initial_pr)
3211 pr['streamingData'] = None
b6de707d 3212 prs.append(pr)
379e44ed 3213
3214 last_error = None
b6de707d 3215 tried_iframe_fallback = False
3216 player_url = None
c0bc527b 3217 while clients:
e7870111 3218 client, base_client, variant = _split_innertube_client(clients.pop())
11f9be09 3219 player_ytcfg = master_ytcfg if client == 'web' else {}
a25bca9f 3220 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3221 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
c0bc527b 3222
b6de707d 3223 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3224 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3225 if 'js' in self._configuration_arg('player_skip'):
3226 require_js_player = False
3227 player_url = None
3228
3229 if not player_url and not tried_iframe_fallback and require_js_player:
3230 player_url = self._download_player_url(video_id)
3231 tried_iframe_fallback = True
3232
379e44ed 3233 try:
3234 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
50ac0e54 3235 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
379e44ed 3236 except ExtractorError as e:
3237 if last_error:
3238 self.report_warning(last_error)
3239 last_error = e
3240 continue
3241
11f9be09 3242 if pr:
a3e96421 3243 # YouTube may return a different video player response than expected.
3244 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3245 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3246 if pr_video_id and pr_video_id != video_id:
3247 self.report_warning(
c7dcf0b3 3248 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
a3e96421 3249 else:
3250 prs.append(pr)
c0bc527b 3251
e7e94f2a 3252 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
e7870111
D
3253 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3254 append_client(f'{base_client}_creator')
e7e94f2a 3255 elif self._is_agegated(pr):
e7870111
D
3256 if variant == 'tv_embedded':
3257 append_client(f'{base_client}_embedded')
3258 elif not variant:
3259 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
c0bc527b 3260
379e44ed 3261 if last_error:
b6de707d 3262 if not len(prs):
379e44ed 3263 raise last_error
3264 self.report_warning(last_error)
b6de707d 3265 return prs, player_url
11f9be09 3266
c646d76f 3267 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
a0bb6ce5 3268 itags, stream_ids = {}, []
b25cac65 3269 itag_qualities, res_qualities = {}, {0: None}
d3fc8074 3270 q = qualities([
2a9c6dcd 3271 # Normally tiny is the smallest video-only formats. But
3272 # audio-only formats with unknown quality may get tagged as tiny
3273 'tiny',
3274 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 3275 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3276 ])
11f9be09 3277 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 3278
545cc85d 3279 for fmt in streaming_formats:
727029c5 3280 if fmt.get('targetDurationSec'):
545cc85d 3281 continue
321bf820 3282
cc2db878 3283 itag = str_or_none(fmt.get('itag'))
9297939e 3284 audio_track = fmt.get('audioTrack') or {}
3285 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3286 if stream_id in stream_ids:
3287 continue
3288
cc2db878 3289 quality = fmt.get('quality')
2a9c6dcd 3290 height = int_or_none(fmt.get('height'))
d3fc8074 3291 if quality == 'tiny' or not quality:
3292 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 3293 # The 3gp format (17) in android client has a quality of "small",
3294 # but is actually worse than other formats
3295 if itag == '17':
3296 quality = 'tiny'
3297 if quality:
3298 if itag:
3299 itag_qualities[itag] = quality
3300 if height:
3301 res_qualities[height] = quality
cc2db878 3302 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3303 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3304 # number of fragment that would subsequently requested with (`&sq=N`)
3305 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3306 continue
3307
545cc85d 3308 fmt_url = fmt.get('url')
3309 if not fmt_url:
14f25df2 3310 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
545cc85d 3311 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3312 encrypted_sig = try_get(sc, lambda x: x['s'][0])
52023f12 3313 if not all((sc, fmt_url, player_url, encrypted_sig)):
545cc85d 3314 continue
52023f12 3315 try:
3316 fmt_url += '&%s=%s' % (
3317 traverse_obj(sc, ('sp', -1)) or 'signature',
3318 self._decrypt_signature(encrypted_sig, video_id, player_url)
3319 )
3320 except ExtractorError as e:
580ce007 3321 self.report_warning('Signature extraction failed: Some formats may be missing',
3322 video_id=video_id, only_once=True)
52023f12 3323 self.write_debug(e, only_once=True)
201e9eaa 3324 continue
545cc85d 3325
404f611f 3326 query = parse_qs(fmt_url)
3327 throttled = False
b2916526 3328 if query.get('n'):
404f611f 3329 try:
580ce007 3330 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
404f611f 3331 fmt_url = update_url_query(fmt_url, {
580ce007 3332 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3333 })
404f611f 3334 except ExtractorError as e:
25836db6 3335 phantomjs_hint = ''
3336 if isinstance(e, JSInterpreter.Exception):
d81ba7d4 3337 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3338 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
aa9369a2 3339 self.report_warning(
25836db6 3340 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3341 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
52023f12 3342 self.write_debug(e, only_once=True)
404f611f 3343 throttled = True
3344
545cc85d 3345 if itag:
a0bb6ce5 3346 itags[itag] = 'https'
9297939e 3347 stream_ids.append(stream_id)
3348
0ad92dfb 3349 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
ab6df717 3350 language_preference = (
3351 10 if audio_track.get('audioIsDefault') and 10
3352 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3353 else -1)
0ad92dfb 3354 # Some formats may have much smaller duration than others (possibly damaged during encoding)
62b58c09 3355 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
a1b2d843 3356 # Make sure to avoid false positives with small duration differences.
62b58c09 3357 # E.g. __2ABJjxzNo, ySuUZEjARPY
a1b2d843 3358 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
08d30158 3359 if is_damaged:
0f06bcd7 3360 self.report_warning(
3361 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
545cc85d 3362 dct = {
3363 'asr': int_or_none(fmt.get('audioSampleRate')),
3364 'filesize': int_or_none(fmt.get('contentLength')),
3365 'format_id': itag,
34921b43 3366 'format_note': join_nonempty(
26e8e044 3367 '%s%s' % (audio_track.get('displayName') or '',
ab6df717 3368 ' (default)' if language_preference > 0 else ''),
404f611f 3369 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
a4166234 3370 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3371 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
0ad92dfb 3372 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
91e5e839 3373 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3374 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
a4211baf 3375 'fps': int_or_none(fmt.get('fps')) or None,
a4166234 3376 'audio_channels': fmt.get('audioChannels'),
2a9c6dcd 3377 'height': height,
dca3ff4a 3378 'quality': q(quality),
727029c5 3379 'has_drm': bool(fmt.get('drmFamilies')),
cc2db878 3380 'tbr': tbr,
545cc85d 3381 'url': fmt_url,
2a9c6dcd 3382 'width': int_or_none(fmt.get('width')),
ab6df717 3383 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3384 'desc' if language_preference < -1 else ''),
3385 'language_preference': language_preference,
a405b38f 3386 # Strictly de-prioritize damaged and 3gp formats
3387 'preference': -10 if is_damaged else -2 if itag == '17' else None,
545cc85d 3388 }
60bdb7bd 3389 mime_mobj = re.match(
3390 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3391 if mime_mobj:
3392 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3393 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 3394 no_audio = dct.get('acodec') == 'none'
3395 no_video = dct.get('vcodec') == 'none'
3396 if no_audio:
3397 dct['vbr'] = tbr
3398 if no_video:
3399 dct['abr'] = tbr
3400 if no_audio or no_video:
545cc85d 3401 dct['downloader_options'] = {
3402 # Youtube throttles chunks >~10M
3403 'http_chunk_size': 10485760,
bf1317d2 3404 }
7c60c33e 3405 if dct.get('ext'):
3406 dct['container'] = dct['ext'] + '_dash'
11f9be09 3407 yield dct
545cc85d 3408
adbc4ec4 3409 live_from_start = is_live and self.get_param('live_from_start')
4bb6b02f 3410 skip_manifests = self._configuration_arg('skip')
adbc4ec4
THD
3411 if not self.get_param('youtube_include_hls_manifest', True):
3412 skip_manifests.append('hls')
0f06bcd7 3413 if not self.get_param('youtube_include_dash_manifest', True):
3414 skip_manifests.append('dash')
adbc4ec4
THD
3415 get_dash = 'dash' not in skip_manifests and (
3416 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3417 get_hls = not live_from_start and 'hls' not in skip_manifests
5d3a0e79 3418
a0bb6ce5 3419 def process_manifest_format(f, proto, itag):
3420 if itag in itags:
3421 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3422 return False
3423 itag = f'{itag}-{proto}'
3424 if itag:
3425 f['format_id'] = itag
3426 itags[itag] = proto
3427
b25cac65 3428 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
5c6d2ef9 3429 if f['quality'] == -1 and f.get('height'):
3430 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
a0bb6ce5 3431 return True
2a9c6dcd 3432
c646d76f 3433 subtitles = {}
11f9be09 3434 for sd in streaming_data:
5d3a0e79 3435 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 3436 if hls_manifest_url:
c646d76f 3437 fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)
3438 subtitles = self._merge_subtitles(subs, subtitles)
3439 for f in fmts:
a0bb6ce5 3440 if process_manifest_format(f, 'hls', self._search_regex(
3441 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3442 yield f
545cc85d 3443
5d3a0e79 3444 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3445 if dash_manifest_url:
c646d76f 3446 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3447 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3448 for f in formats:
a0bb6ce5 3449 if process_manifest_format(f, 'dash', f['format_id']):
3450 f['filesize'] = int_or_none(self._search_regex(
3451 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
adbc4ec4
THD
3452 if live_from_start:
3453 f['is_from_start'] = True
3454
a0bb6ce5 3455 yield f
c646d76f 3456 yield subtitles
11f9be09 3457
720c3099 3458 def _extract_storyboard(self, player_responses, duration):
3459 spec = get_first(
3460 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 3461 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3462 if not base_url:
720c3099 3463 return
720c3099 3464 L = len(spec) - 1
3465 for i, args in enumerate(spec):
3466 args = args.split('#')
3467 counts = list(map(int_or_none, args[:5]))
3468 if len(args) != 8 or not all(counts):
3469 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3470 continue
3471 width, height, frame_count, cols, rows = counts
3472 N, sigh = args[6:]
3473
3474 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3475 fragment_count = frame_count / (cols * rows)
3476 fragment_duration = duration / fragment_count
3477 yield {
3478 'format_id': f'sb{i}',
3479 'format_note': 'storyboard',
3480 'ext': 'mhtml',
3481 'protocol': 'mhtml',
3482 'acodec': 'none',
3483 'vcodec': 'none',
3484 'url': url,
3485 'width': width,
3486 'height': height,
45e8a04e 3487 'fps': frame_count / duration,
3488 'rows': rows,
3489 'columns': cols,
720c3099 3490 'fragments': [{
b3edc806 3491 'url': url.replace('$M', str(j)),
720c3099 3492 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3493 } for j in range(math.ceil(fragment_count))],
3494 }
3495
adbc4ec4 3496 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 3497 webpage = None
3498 if 'webpage' not in self._configuration_arg('player_skip'):
50ac0e54 3499 query = {'bpctr': '9999999999', 'has_verified': '1'}
3500 if smuggled_data.get('is_story'):
3501 query['pp'] = self._STORY_PLAYER_PARAMS
b6de707d 3502 webpage = self._download_webpage(
50ac0e54 3503 webpage_url, video_id, fatal=False, query=query)
11f9be09 3504
3505 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 3506
b6de707d 3507 player_responses, player_url = self._extract_player_responses(
11f9be09 3508 self._get_requested_clients(url, smuggled_data),
50ac0e54 3509 video_id, webpage, master_ytcfg, smuggled_data)
11f9be09 3510
adbc4ec4
THD
3511 return webpage, master_ytcfg, player_responses, player_url
3512
a1b2d843 3513 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
adbc4ec4
THD
3514 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3515 is_live = get_first(video_details, 'isLive')
3516 if is_live is None:
3517 is_live = get_first(live_broadcast_details, 'isLiveNow')
3518
3519 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
c646d76f 3520 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)
adbc4ec4 3521
c646d76f 3522 return live_broadcast_details, is_live, streaming_data, formats, subtitles
adbc4ec4
THD
3523
3524 def _real_extract(self, url):
3525 url, smuggled_data = unsmuggle_url(url, {})
3526 video_id = self._match_id(url)
3527
3528 base_url = self.http_scheme() + '//www.youtube.com/'
3529 webpage_url = base_url + 'watch?v=' + video_id
3530
3531 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3532
11f9be09 3533 playability_statuses = traverse_obj(
3534 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3535
3536 trailer_video_id = get_first(
3537 playability_statuses,
3538 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3539 expected_type=str)
3540 if trailer_video_id:
3541 return self.url_result(
3542 trailer_video_id, self.ie_key(), trailer_video_id)
3543
3544 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3545 if webpage else (lambda x: None))
3546
3547 video_details = traverse_obj(
3548 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3549 microformats = traverse_obj(
3550 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3551 expected_type=dict, default=[])
3552 video_title = (
3553 get_first(video_details, 'title')
3554 or self._get_text(microformats, (..., 'title'))
3555 or search_meta(['og:title', 'twitter:title', 'title']))
3556 video_description = get_first(video_details, 'shortDescription')
3557
d89257f3 3558 multifeed_metadata_list = get_first(
3559 player_responses,
3560 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3561 expected_type=str)
3562 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3563 if self.get_param('noplaylist'):
11f9be09 3564 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 3565 else:
3566 entries = []
3567 feed_ids = []
3568 for feed in multifeed_metadata_list.split(','):
3569 # Unquote should take place before split on comma (,) since textual
3570 # fields may contain comma as well (see
3571 # https://github.com/ytdl-org/youtube-dl/issues/8536)
14f25df2 3572 feed_data = urllib.parse.parse_qs(
ac668111 3573 urllib.parse.unquote_plus(feed))
d89257f3 3574
3575 def feed_entry(name):
3576 return try_get(
14f25df2 3577 feed_data, lambda x: x[name][0], str)
d89257f3 3578
3579 feed_id = feed_entry('id')
3580 if not feed_id:
3581 continue
3582 feed_title = feed_entry('title')
3583 title = video_title
3584 if feed_title:
3585 title += ' (%s)' % feed_title
3586 entries.append({
3587 '_type': 'url_transparent',
3588 'ie_key': 'Youtube',
3589 'url': smuggle_url(
3590 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3591 {'force_singlefeed': True}),
3592 'title': title,
3593 })
3594 feed_ids.append(feed_id)
3595 self.to_screen(
3596 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3597 % (', '.join(feed_ids), video_id))
3598 return self.playlist_result(
3599 entries, video_id, video_title, video_description)
11f9be09 3600
a1b2d843 3601 duration = int_or_none(
3602 get_first(video_details, 'lengthSeconds')
3603 or get_first(microformats, 'lengthSeconds')
3604 or parse_duration(search_meta('duration'))) or None
3605
c646d76f 3606 live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \
3607 self._list_formats(video_id, microformats, video_details, player_responses, player_url)
bf1317d2 3608
545cc85d 3609 if not formats:
11f9be09 3610 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 3611 self.report_drm(video_id)
11f9be09 3612 pemr = get_first(
3613 playability_statuses,
3614 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3615 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3616 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 3617 if subreason:
545cc85d 3618 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 3619 countries = get_first(microformats, 'availableCountries')
545cc85d 3620 if not countries:
3621 regions_allowed = search_meta('regionsAllowed')
3622 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 3623 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 3624 reason += f'. {subreason}'
545cc85d 3625 if reason:
b7da73eb 3626 self.raise_no_formats(reason, expected=True)
bf1317d2 3627
11f9be09 3628 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 3629 if not keywords and webpage:
3630 keywords = [
3631 unescapeHTML(m.group('content'))
3632 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3633 for keyword in keywords:
3634 if keyword.startswith('yt:stretch='):
201c1459 3635 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3636 if mobj:
3637 # NB: float is intentional for forcing float division
3638 w, h = (float(v) for v in mobj.groups())
3639 if w > 0 and h > 0:
3640 ratio = w / h
3641 for f in formats:
3642 if f.get('vcodec') != 'none':
3643 f['stretched_ratio'] = ratio
3644 break
a709d873 3645 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 3646 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3647 if thumbnail_url:
3648 thumbnails.append({
3649 'url': thumbnail_url,
ff2751ac 3650 })
fccf5021 3651 original_thumbnails = thumbnails.copy()
3652
0ba692ac 3653 # The best resolution thumbnails sometimes does not appear in the webpage
bfec31be 3654 # See: https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 3655 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 3656 thumbnail_names = [
962ffcf8 3657 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
bfec31be 3658 # in resolution, these are not the custom thumbnail. So de-prioritize them
3659 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3660 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
cca80fe6 3661 ]
cca80fe6 3662 n_thumbnail_names = len(thumbnail_names)
0ba692ac 3663 thumbnails.extend({
3664 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3665 video_id=video_id, name=name, ext=ext,
3666 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 3667 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 3668 for thumb in thumbnails:
cca80fe6 3669 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 3670 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 3671 self._remove_duplicate_formats(thumbnails)
fccf5021 3672 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 3673
7ea65411 3674 category = get_first(microformats, 'category') or search_meta('genre')
3675 channel_id = str_or_none(
3676 get_first(video_details, 'channelId')
3677 or get_first(microformats, 'externalChannelId')
3678 or search_meta('channelId'))
7ea65411 3679 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3680
3681 live_content = get_first(video_details, 'isLiveContent')
3682 is_upcoming = get_first(video_details, 'isUpcoming')
3683 if is_live is None:
3684 if is_upcoming or live_content is False:
3685 is_live = False
3686 if is_upcoming is None and (live_content or is_live):
3687 is_upcoming = False
adbc4ec4
THD
3688 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3689 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3690 if not duration and live_end_time and live_start_time:
3691 duration = live_end_time - live_start_time
3692
3693 if is_live and self.get_param('live_from_start'):
3694 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
7ea65411 3695
720c3099 3696 formats.extend(self._extract_storyboard(player_responses, duration))
3697
31b532a1 3698 # source_preference is lower for throttled/potentially damaged formats
7e798d72 3699 self._sort_formats(formats, (
3700 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
720c3099 3701
545cc85d 3702 info = {
3703 'id': video_id,
39ca3b5c 3704 'title': video_title,
545cc85d 3705 'formats': formats,
3706 'thumbnails': thumbnails,
fccf5021 3707 # The best thumbnail that we are sure exists. Prevents unnecessary
3708 # URL checking if user don't care about getting the best possible thumbnail
3709 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 3710 'description': video_description,
11f9be09 3711 'uploader': get_first(video_details, 'author'),
545cc85d 3712 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3713 'uploader_url': owner_profile_url,
3714 'channel_id': channel_id,
a70635b8 3715 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
545cc85d 3716 'duration': duration,
3717 'view_count': int_or_none(
11f9be09 3718 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 3719 or search_meta('interactionCount')),
11f9be09 3720 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 3721 'age_limit': 18 if (
11f9be09 3722 get_first(microformats, 'isFamilySafe') is False
545cc85d 3723 or search_meta('isFamilyFriendly') == 'false'
3724 or search_meta('og:restrictions:age') == '18+') else 0,
3725 'webpage_url': webpage_url,
3726 'categories': [category] if category else None,
3727 'tags': keywords,
11f9be09 3728 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 3729 'is_live': is_live,
3730 'was_live': (False if is_live or is_upcoming or live_content is False
3731 else None if is_live is None or is_upcoming is None
3732 else live_content),
3733 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
adbc4ec4 3734 'release_timestamp': live_start_time,
545cc85d 3735 }
b477fc13 3736
e325a21a 3737 if get_first(video_details, 'isPostLiveDvr'):
3738 self.write_debug('Video is in Post-Live Manifestless mode')
3739 info['live_status'] = 'post_live'
3740 if (duration or 0) > 4 * 3600:
3741 self.report_warning(
3742 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
3743 'This is a known issue and patches are welcome')
3744
c646d76f 3745 subtitles = {}
3944e7af 3746 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 3747 if pctr:
ecdc9049 3748 def get_lang_code(track):
3749 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3750 or track.get('languageCode'))
3751
3752 # Converted into dicts to remove duplicates
3753 captions = {
3754 get_lang_code(sub): sub
3755 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3756 translation_languages = {
3757 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3758 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3759
774d79cc 3760 def process_language(container, base_url, lang_code, sub_name, query):
120916da 3761 lang_subs = container.setdefault(lang_code, [])
545cc85d 3762 for fmt in self._SUBTITLE_FORMATS:
3763 query.update({
3764 'fmt': fmt,
3765 })
3766 lang_subs.append({
3767 'ext': fmt,
60f393e4 3768 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
774d79cc 3769 'name': sub_name,
545cc85d 3770 })
7e72694b 3771
07b47084 3772 # NB: Constructing the full subtitle dictionary is slow
3773 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
3774 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
ecdc9049 3775 for lang_code, caption_track in captions.items():
3776 base_url = caption_track.get('baseUrl')
1235d333 3777 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
545cc85d 3778 if not base_url:
3779 continue
ecdc9049 3780 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 3781 if caption_track.get('kind') != 'asr':
545cc85d 3782 if not lang_code:
3783 continue
3784 process_language(
ecdc9049 3785 subtitles, base_url, lang_code, lang_name, {})
3786 if not caption_track.get('isTranslatable'):
3787 continue
3944e7af 3788 for trans_code, trans_name in translation_languages.items():
3789 if not trans_code:
545cc85d 3790 continue
1235d333 3791 orig_trans_code = trans_code
ecdc9049 3792 if caption_track.get('kind') != 'asr':
07b47084 3793 if not get_translated_subs:
18e49408 3794 continue
ecdc9049 3795 trans_code += f'-{lang_code}'
a70635b8 3796 trans_name += format_field(lang_name, None, ' from %s')
d49669ac 3797 # Add an "-orig" label to the original language so that it can be distinguished.
3798 # The subs are returned without "-orig" as well for compatibility
1235d333 3799 if lang_code == f'a-{orig_trans_code}':
0c8d9e5f 3800 process_language(
d49669ac 3801 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3802 # Setting tlang=lang returns damaged subtitles.
d49669ac 3803 process_language(automatic_captions, base_url, trans_code, trans_name,
1235d333 3804 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
c646d76f 3805
3806 info['automatic_captions'] = automatic_captions
3807 info['subtitles'] = subtitles
7e72694b 3808
14f25df2 3809 parsed_url = urllib.parse.urlparse(url)
545cc85d 3810 for component in [parsed_url.fragment, parsed_url.query]:
14f25df2 3811 query = urllib.parse.parse_qs(component)
545cc85d 3812 for k, v in query.items():
3813 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3814 d_k += '_time'
3815 if d_k not in info and k in s_ks:
3816 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
3817
3818 # Youtube Music Auto-generated description
822b9d9c 3819 if video_description:
1890fc63 3820 mobj = re.search(
3821 r'''(?xs)
3822 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
3823 (?P<album>[^\n]+)
3824 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
3825 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
3826 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
3827 .+\nAuto-generated\ by\ YouTube\.\s*$
3828 ''', video_description)
822b9d9c 3829 if mobj:
822b9d9c
RA
3830 release_year = mobj.group('release_year')
3831 release_date = mobj.group('release_date')
3832 if release_date:
3833 release_date = release_date.replace('-', '')
3834 if not release_year:
545cc85d 3835 release_year = release_date[:4]
3836 info.update({
3837 'album': mobj.group('album'.strip()),
3838 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3839 'track': mobj.group('track').strip(),
3840 'release_date': release_date,
cc2db878 3841 'release_year': int_or_none(release_year),
545cc85d 3842 })
7e72694b 3843
545cc85d 3844 initial_data = None
3845 if webpage:
56ba69e4 3846 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
545cc85d 3847 if not initial_data:
99e9e001 3848 query = {'videoId': video_id}
3849 query.update(self._get_checkok_params())
109dd3b2 3850 initial_data = self._extract_response(
3851 item_id=video_id, ep='next', fatal=False,
99e9e001 3852 ytcfg=master_ytcfg, query=query,
3853 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 3854 note='Downloading initial data API JSON')
545cc85d 3855
0df111a3 3856 info['comment_count'] = traverse_obj(initial_data, (
3857 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
3858 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
3859 ), (
3860 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
3861 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
3862 ), expected_type=int_or_none, get_all=False)
3863
19a03940 3864 try: # This will error if there is no livechat
c60ee3a2 3865 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
19a03940 3866 except (KeyError, IndexError, TypeError):
3867 pass
3868 else:
ecdc9049 3869 info.setdefault('subtitles', {})['live_chat'] = [{
4ce05f57 3870 # url is needed to set cookies
3871 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
c60ee3a2 3872 'video_id': video_id,
3873 'ext': 'json',
f6745c49 3874 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3875 }]
545cc85d 3876
3877 if initial_data:
7c365c21 3878 info['chapters'] = (
3879 self._extract_chapters_from_json(initial_data, duration)
3880 or self._extract_chapters_from_engagement_panel(initial_data, duration)
0fe51254 3881 or self._extract_chapters_from_description(video_description, duration)
7c365c21 3882 or None)
545cc85d 3883
17322130 3884 contents = traverse_obj(
3885 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
3886 expected_type=list, default=[])
3887
3888 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
3889 if vpir:
3890 stl = vpir.get('superTitleLink')
3891 if stl:
3892 stl = self._get_text(stl)
3893 if try_get(
3894 vpir,
3895 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3896 info['location'] = stl
3897 else:
affc4fef 3898 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
17322130 3899 if mobj:
545cc85d 3900 info.update({
17322130 3901 'series': mobj.group(1),
3902 'season_number': int(mobj.group(2)),
3903 'episode_number': int(mobj.group(3)),
545cc85d 3904 })
17322130 3905 for tlb in (try_get(
3906 vpir,
3907 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3908 list) or []):
3909 tbr = tlb.get('toggleButtonRenderer') or {}
3910 for getter, regex in [(
3911 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3912 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3913 lambda x: x['accessibility'],
3914 lambda x: x['accessibilityData']['accessibilityData'],
3915 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3916 label = (try_get(tbr, getter, dict) or {}).get('label')
3917 if label:
3918 mobj = re.match(regex, label)
3919 if mobj:
3920 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
545cc85d 3921 break
17322130 3922 sbr_tooltip = try_get(
3923 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3924 if sbr_tooltip:
3925 like_count, dislike_count = sbr_tooltip.split(' / ')
3926 info.update({
3927 'like_count': str_to_int(like_count),
3928 'dislike_count': str_to_int(dislike_count),
3929 })
3930 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
3931 if vsir:
3932 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
3933 info.update({
3934 'channel': self._get_text(vor, 'title'),
3935 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
3936
3937 rows = try_get(
3938 vsir,
3939 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3940 list) or []
3941 multiple_songs = False
3942 for row in rows:
3943 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3944 multiple_songs = True
3945 break
3946 for row in rows:
3947 mrr = row.get('metadataRowRenderer') or {}
3948 mrr_title = mrr.get('title')
3949 if not mrr_title:
3950 continue
3951 mrr_title = self._get_text(mrr, 'title')
3952 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3953 if mrr_title == 'License':
3954 info['license'] = mrr_contents_text
3955 elif not multiple_songs:
3956 if mrr_title == 'Album':
3957 info['album'] = mrr_contents_text
3958 elif mrr_title == 'Artist':
3959 info['artist'] = mrr_contents_text
3960 elif mrr_title == 'Song':
3961 info['track'] = mrr_contents_text
545cc85d 3962
3963 fallbacks = {
3964 'channel': 'uploader',
3965 'channel_id': 'uploader_id',
3966 'channel_url': 'uploader_url',
3967 }
992f9a73 3968
17322130 3969 # The upload date for scheduled, live and past live streams / premieres in microformats
3970 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
992f9a73 3971 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
17322130 3972 upload_date = (
3973 unified_strdate(get_first(microformats, 'uploadDate'))
3974 or unified_strdate(search_meta('uploadDate')))
1ff88b7a 3975 if not upload_date or (
3976 not info.get('is_live')
3977 and not info.get('was_live')
3978 and info.get('live_status') != 'is_upcoming'
3979 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
3980 ):
6e634cbe 3981 upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
17322130 3982 info['upload_date'] = upload_date
992f9a73 3983
545cc85d 3984 for to, frm in fallbacks.items():
3985 if not info.get(to):
3986 info[to] = info.get(frm)
3987
3988 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3989 v = info.get(s_k)
3990 if v:
3991 info[d_k] = v
b84071c0 3992
11f9be09 3993 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3994 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3995 is_membersonly = None
b28f8d24 3996 is_premium = None
c224251a
M
3997 if initial_data and is_private is not None:
3998 is_membersonly = False
b28f8d24 3999 is_premium = False
47193e02 4000 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
4001 badge_labels = set()
4002 for content in contents:
4003 if not isinstance(content, dict):
4004 continue
4005 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
4006 for badge_label in badge_labels:
4007 if badge_label.lower() == 'members only':
4008 is_membersonly = True
4009 elif badge_label.lower() == 'premium':
4010 is_premium = True
4011 elif badge_label.lower() == 'unlisted':
4012 is_unlisted = True
c224251a 4013
c224251a
M
4014 info['availability'] = self._availability(
4015 is_private=is_private,
b28f8d24 4016 needs_premium=is_premium,
c224251a
M
4017 needs_subscription=is_membersonly,
4018 needs_auth=info['age_limit'] >= 18,
4019 is_unlisted=None if is_private is None else is_unlisted)
4020
a2160aa4 4021 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 4022
11f9be09 4023 self.mark_watched(video_id, player_responses)
d77ab8e2 4024
545cc85d 4025 return info
c5e8d7af 4026
a61fd4cf 4027
a6213a49 4028class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
5f6a1245 4029
182bda88 4030 @staticmethod
4031 def passthrough_smuggled_data(func):
4032 def _smuggle(entries, smuggled_data):
4033 for entry in entries:
4034 # TODO: Convert URL to music.youtube instead.
4035 # Do we need to passthrough any other smuggled_data?
4036 entry['url'] = smuggle_url(entry['url'], smuggled_data)
4037 yield entry
4038
4039 @functools.wraps(func)
4040 def wrapper(self, url):
4041 url, smuggled_data = unsmuggle_url(url, {})
4042 if self.is_music_url(url):
4043 smuggled_data['is_music_url'] = True
4044 info_dict = func(self, url, smuggled_data)
4045 if smuggled_data and info_dict.get('entries'):
4046 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
4047 return info_dict
4048 return wrapper
4049
a6213a49 4050 def _extract_channel_id(self, webpage):
4051 channel_id = self._html_search_meta(
4052 'channelId', webpage, 'channel id', default=None)
4053 if channel_id:
4054 return channel_id
4055 channel_url = self._html_search_meta(
4056 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
4057 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
4058 'twitter:app:url:googleplay'), webpage, 'channel url')
4059 return self._search_regex(
4060 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
4061 channel_url, 'channel id')
15f6397c 4062
8bdd16b4 4063 @staticmethod
cd7c66cf 4064 def _extract_basic_item_renderer(item):
4065 # Modified from _extract_grid_item_renderer
201c1459 4066 known_basic_renderers = (
a17526e4 4067 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
cd7c66cf 4068 )
4069 for key, renderer in item.items():
201c1459 4070 if not isinstance(renderer, dict):
cd7c66cf 4071 continue
201c1459 4072 elif key in known_basic_renderers:
4073 return renderer
4074 elif key.startswith('grid') and key.endswith('Renderer'):
4075 return renderer
8bdd16b4 4076
8bdd16b4 4077 def _grid_entries(self, grid_renderer):
4078 for item in grid_renderer['items']:
4079 if not isinstance(item, dict):
39b62db1 4080 continue
cd7c66cf 4081 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 4082 if not isinstance(renderer, dict):
4083 continue
052e1350 4084 title = self._get_text(renderer, 'title')
fe93e2c4 4085
8bdd16b4 4086 # playlist
4087 playlist_id = renderer.get('playlistId')
4088 if playlist_id:
4089 yield self.url_result(
4090 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4091 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4092 video_title=title)
201c1459 4093 continue
8bdd16b4 4094 # video
4095 video_id = renderer.get('videoId')
4096 if video_id:
4097 yield self._extract_video(renderer)
201c1459 4098 continue
8bdd16b4 4099 # channel
4100 channel_id = renderer.get('channelId')
4101 if channel_id:
8bdd16b4 4102 yield self.url_result(
4103 'https://www.youtube.com/channel/%s' % channel_id,
4104 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 4105 continue
4106 # generic endpoint URL support
4107 ep_url = urljoin('https://www.youtube.com/', try_get(
4108 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4109 str))
201c1459 4110 if ep_url:
4111 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4112 if ie.suitable(ep_url):
4113 yield self.url_result(
4114 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4115 break
8bdd16b4 4116
16aa9ea4 4117 def _music_reponsive_list_entry(self, renderer):
4118 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4119 if video_id:
4120 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4121 ie=YoutubeIE.ie_key(), video_id=video_id)
4122 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4123 if playlist_id:
4124 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4125 if video_id:
4126 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4127 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4128 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4129 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4130 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4131 if browse_id:
4132 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4133 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4134
3d3dddc9 4135 def _shelf_entries_from_content(self, shelf_renderer):
4136 content = shelf_renderer.get('content')
4137 if not isinstance(content, dict):
8bdd16b4 4138 return
cd7c66cf 4139 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 4140 if renderer:
4141 # TODO: add support for nested playlists so each shelf is processed
4142 # as separate playlist
4143 # TODO: this includes only first N items
86e5f3ed 4144 yield from self._grid_entries(renderer)
3d3dddc9 4145 renderer = content.get('horizontalListRenderer')
4146 if renderer:
4147 # TODO
4148 pass
8bdd16b4 4149
29f7c58a 4150 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 4151 ep = try_get(
4152 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4153 str)
8bdd16b4 4154 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 4155 if shelf_url:
29f7c58a 4156 # Skipping links to another channels, note that checking for
4157 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4158 # will not work
4159 if skip_channels and '/channels?' in shelf_url:
4160 return
052e1350 4161 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 4162 yield self.url_result(shelf_url, video_title=title)
4163 # Shelf may not contain shelf URL, fallback to extraction from content
86e5f3ed 4164 yield from self._shelf_entries_from_content(shelf_renderer)
c5e8d7af 4165
8bdd16b4 4166 def _playlist_entries(self, video_list_renderer):
4167 for content in video_list_renderer['contents']:
4168 if not isinstance(content, dict):
4169 continue
4170 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4171 if not isinstance(renderer, dict):
4172 continue
4173 video_id = renderer.get('videoId')
4174 if not video_id:
4175 continue
4176 yield self._extract_video(renderer)
07aeced6 4177
3462ffa8 4178 def _rich_entries(self, rich_grid_renderer):
4179 renderer = try_get(
70d5c17b 4180 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 4181 video_id = renderer.get('videoId')
4182 if not video_id:
4183 return
4184 yield self._extract_video(renderer)
4185
8bdd16b4 4186 def _video_entry(self, video_renderer):
4187 video_id = video_renderer.get('videoId')
4188 if video_id:
4189 return self._extract_video(video_renderer)
dacb3a86 4190
ad210f4f 4191 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4192 url = urljoin('https://youtube.com', traverse_obj(
4193 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4194 if url:
4195 return self.url_result(
4196 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4197
8bdd16b4 4198 def _post_thread_entries(self, post_thread_renderer):
4199 post_renderer = try_get(
4200 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4201 if not post_renderer:
4202 return
4203 # video attachment
4204 video_renderer = try_get(
895b0931 4205 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4206 video_id = video_renderer.get('videoId')
4207 if video_id:
4208 entry = self._extract_video(video_renderer)
8bdd16b4 4209 if entry:
4210 yield entry
895b0931 4211 # playlist attachment
4212 playlist_id = try_get(
14f25df2 4213 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
895b0931 4214 if playlist_id:
4215 yield self.url_result(
e28f1c0a 4216 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4217 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4218 # inline video links
4219 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4220 for run in runs:
4221 if not isinstance(run, dict):
4222 continue
4223 ep_url = try_get(
14f25df2 4224 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
8bdd16b4 4225 if not ep_url:
4226 continue
4227 if not YoutubeIE.suitable(ep_url):
4228 continue
4229 ep_video_id = YoutubeIE._match_id(ep_url)
4230 if video_id == ep_video_id:
4231 continue
895b0931 4232 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 4233
8bdd16b4 4234 def _post_thread_continuation_entries(self, post_thread_continuation):
4235 contents = post_thread_continuation.get('contents')
4236 if not isinstance(contents, list):
4237 return
4238 for content in contents:
4239 renderer = content.get('backstagePostThreadRenderer')
6b0b0a28 4240 if isinstance(renderer, dict):
4241 yield from self._post_thread_entries(renderer)
8bdd16b4 4242 continue
6b0b0a28 4243 renderer = content.get('videoRenderer')
4244 if isinstance(renderer, dict):
4245 yield self._video_entry(renderer)
07aeced6 4246
39ed931e 4247 r''' # unused
4248 def _rich_grid_entries(self, contents):
4249 for content in contents:
4250 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4251 if video_renderer:
4252 entry = self._video_entry(video_renderer)
4253 if entry:
4254 yield entry
4255 '''
52efa4b3 4256
a6213a49 4257 def _extract_entries(self, parent_renderer, continuation_list):
4258 # continuation_list is modified in-place with continuation_list = [continuation_token]
4259 continuation_list[:] = [None]
4260 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4261 for content in contents:
4262 if not isinstance(content, dict):
4263 continue
16aa9ea4 4264 is_renderer = traverse_obj(
4265 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4266 expected_type=dict)
a6213a49 4267 if not is_renderer:
4268 renderer = content.get('richItemRenderer')
4269 if renderer:
4270 for entry in self._rich_entries(renderer):
4271 yield entry
4272 continuation_list[0] = self._extract_continuation(parent_renderer)
4273 continue
4274 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4275 for isr_content in isr_contents:
4276 if not isinstance(isr_content, dict):
8bdd16b4 4277 continue
69184e41 4278
a6213a49 4279 known_renderers = {
4280 'playlistVideoListRenderer': self._playlist_entries,
4281 'gridRenderer': self._grid_entries,
a17526e4 4282 'reelShelfRenderer': self._grid_entries,
4283 'shelfRenderer': self._shelf_entries,
16aa9ea4 4284 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
a6213a49 4285 'backstagePostThreadRenderer': self._post_thread_entries,
4286 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 4287 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4288 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
ad210f4f 4289 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
a6213a49 4290 }
4291 for key, renderer in isr_content.items():
4292 if key not in known_renderers:
4293 continue
4294 for entry in known_renderers[key](renderer):
4295 if entry:
4296 yield entry
4297 continuation_list[0] = self._extract_continuation(renderer)
4298 break
70d5c17b 4299
4300 if not continuation_list[0]:
a6213a49 4301 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 4302
a6213a49 4303 if not continuation_list[0]:
4304 continuation_list[0] = self._extract_continuation(parent_renderer)
4305
4306 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4307 continuation_list = [None]
4308 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 4309 tab_content = try_get(tab, lambda x: x['content'], dict)
4310 if not tab_content:
4311 return
3462ffa8 4312 parent_renderer = (
29f7c58a 4313 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4314 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
86e5f3ed 4315 yield from extract_entries(parent_renderer)
3462ffa8 4316 continuation = continuation_list[0]
d069eca7 4317
8bdd16b4 4318 for page_num in itertools.count(1):
4319 if not continuation:
4320 break
99e9e001 4321 headers = self.generate_api_headers(
4322 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 4323 response = self._extract_response(
86e5f3ed 4324 item_id=f'{item_id} page {page_num}',
fe93e2c4 4325 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 4326 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
4327
4328 if not response:
8bdd16b4 4329 break
ac56cf38 4330 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4331 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4332 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 4333
69184e41 4334 known_continuation_renderers = {
4335 'playlistVideoListContinuation': self._playlist_entries,
4336 'gridContinuation': self._grid_entries,
4337 'itemSectionContinuation': self._post_thread_continuation_entries,
4338 'sectionListContinuation': extract_entries, # for feeds
4339 }
8bdd16b4 4340 continuation_contents = try_get(
69184e41 4341 response, lambda x: x['continuationContents'], dict) or {}
4342 continuation_renderer = None
4343 for key, value in continuation_contents.items():
4344 if key not in known_continuation_renderers:
3462ffa8 4345 continue
69184e41 4346 continuation_renderer = value
4347 continuation_list = [None]
86e5f3ed 4348 yield from known_continuation_renderers[key](continuation_renderer)
69184e41 4349 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
4350 break
4351 if continuation_renderer:
4352 continue
c5e8d7af 4353
a1b535bd 4354 known_renderers = {
e4b98809 4355 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
a1b535bd 4356 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4357 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 4358 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 4359 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 4360 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 4361 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 4362 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 4363 }
cce889b9 4364 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 4365 continuation_items = try_get(
cce889b9 4366 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 4367 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
4368 video_items_renderer = None
4369 for key, value in continuation_item.items():
4370 if key not in known_renderers:
8bdd16b4 4371 continue
a1b535bd 4372 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 4373 continuation_list = [None]
86e5f3ed 4374 yield from known_renderers[key][0](video_items_renderer)
9ba5705a 4375 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 4376 break
4377 if video_items_renderer:
4378 continue
8bdd16b4 4379 break
9558dcec 4380
8bdd16b4 4381 @staticmethod
7c219ea6 4382 def _extract_selected_tab(tabs, fatal=True):
8bdd16b4 4383 for tab in tabs:
cd684175 4384 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4385 if renderer.get('selected') is True:
4386 return renderer
2b3c2546 4387 else:
7c219ea6 4388 if fatal:
4389 raise ExtractorError('Unable to find selected tab')
b82f815f 4390
61d3665d 4391 def _extract_uploader(self, data):
8bdd16b4 4392 uploader = {}
61d3665d 4393 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
47193e02 4394 owner = try_get(
4395 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4396 if owner:
61d3665d 4397 owner_text = owner.get('text')
4398 uploader['uploader'] = self._search_regex(
4399 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
47193e02 4400 uploader['uploader_id'] = try_get(
14f25df2 4401 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
47193e02 4402 uploader['uploader_url'] = urljoin(
4403 'https://www.youtube.com/',
14f25df2 4404 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
9c3fe2ef 4405 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 4406
ac56cf38 4407 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 4408 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 4409 tags = []
b60419c5 4410
8bdd16b4 4411 selected_tab = self._extract_selected_tab(tabs)
f0d785d3 4412 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
8bdd16b4 4413 renderer = try_get(
4414 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4415 if renderer:
b60419c5 4416 channel_name = renderer.get('title')
4417 channel_url = renderer.get('channelUrl')
4418 channel_id = renderer.get('externalId')
39ed931e 4419 else:
64c0d954 4420 renderer = try_get(
4421 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 4422
8bdd16b4 4423 if renderer:
4424 title = renderer.get('title')
ecc97af3 4425 description = renderer.get('description', '')
b60419c5 4426 playlist_id = channel_id
4427 tags = renderer.get('keywords', '').split()
b60419c5 4428
301d07fc 4429 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4430 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4431 def _get_uncropped(url):
4432 return url_or_none((url or '').split('=')[0] + '=s0')
4433
4434 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4435 if avatar_thumbnails:
4436 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4437 if uncropped_avatar:
4438 avatar_thumbnails.append({
4439 'url': uncropped_avatar,
4440 'id': 'avatar_uncropped',
4441 'preference': 1
4442 })
4443
4444 channel_banners = self._extract_thumbnails(
4445 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4446 for banner in channel_banners:
4447 banner['preference'] = -10
4448
4449 if channel_banners:
4450 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4451 if uncropped_banner:
4452 channel_banners.append({
4453 'url': uncropped_banner,
4454 'id': 'banner_uncropped',
4455 'preference': -5
4456 })
4457
4458 primary_thumbnails = self._extract_thumbnails(
a17526e4 4459 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
a709d873 4460
3462ffa8 4461 if playlist_id is None:
70d5c17b 4462 playlist_id = item_id
f0d785d3 4463
4464 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
4465 last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
70d5c17b 4466 if title is None:
f0d785d3 4467 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
b60419c5 4468 title += format_field(selected_tab, 'title', ' - %s')
cd684175 4469 title += format_field(selected_tab, 'expandedText', ' - %s')
f0d785d3 4470
b60419c5 4471 metadata = {
4472 'playlist_id': playlist_id,
4473 'playlist_title': title,
4474 'playlist_description': description,
4475 'uploader': channel_name,
4476 'uploader_id': channel_id,
4477 'uploader_url': channel_url,
301d07fc 4478 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
b60419c5 4479 'tags': tags,
f0d785d3 4480 'view_count': self._get_count(playlist_stats, 1),
4481 'availability': self._extract_availability(data),
4482 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
6c73052c 4483 'playlist_count': self._get_count(playlist_stats, 0),
4484 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
b60419c5 4485 }
4486 if not channel_id:
4487 metadata.update(self._extract_uploader(data))
4488 metadata.update({
4489 'channel': metadata['uploader'],
4490 'channel_id': metadata['uploader_id'],
4491 'channel_url': metadata['uploader_url']})
4492 return self.playlist_result(
d069eca7 4493 self._entries(
ac56cf38 4494 selected_tab, playlist_id, ytcfg,
4495 self._extract_account_syncid(ytcfg, data),
4496 self._extract_visitor_data(data, ytcfg)),
b60419c5 4497 **metadata)
73c4ac2c 4498
6e634cbe 4499 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
ac56cf38 4500 first_id = last_id = response = None
2be71994 4501 for page_num in itertools.count(1):
cd7c66cf 4502 videos = list(self._playlist_entries(playlist))
4503 if not videos:
4504 return
2be71994 4505 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4506 if start >= len(videos):
4507 return
24146491 4508 yield from videos[start:]
2be71994 4509 first_id = first_id or videos[0]['id']
4510 last_id = videos[-1]['id']
79360d99 4511 watch_endpoint = try_get(
4512 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 4513 headers = self.generate_api_headers(
4514 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4515 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 4516 query = {
4517 'playlistId': playlist_id,
4518 'videoId': watch_endpoint.get('videoId') or last_id,
4519 'index': watch_endpoint.get('index') or len(videos),
4520 'params': watch_endpoint.get('params') or 'OAE%3D'
4521 }
4522 response = self._extract_response(
4523 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4524 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4525 check_get_keys='contents'
4526 )
cd7c66cf 4527 playlist = try_get(
79360d99 4528 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4529
ac56cf38 4530 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 4531 title = playlist.get('title') or try_get(
14f25df2 4532 data, lambda x: x['titleText']['simpleText'], str)
8bdd16b4 4533 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4534
4535 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4536 playlist_url = urljoin(url, try_get(
4537 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4538 str))
6e634cbe 4539
4540 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4541 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4542 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4543
4544 if playlist_url and playlist_url != url and not is_known_unviewable:
29f7c58a 4545 return self.url_result(
4546 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4547 video_title=title)
cd7c66cf 4548
8bdd16b4 4549 return self.playlist_result(
6e634cbe 4550 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 4551 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4552
47193e02 4553 def _extract_availability(self, data):
4554 """
4555 Gets the availability of a given playlist/tab.
4556 Note: Unless YouTube tells us explicitly, we do not assume it is public
4557 @param data: response
4558 """
4559 is_private = is_unlisted = None
4560 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4561 badge_labels = self._extract_badges(renderer)
4562
4563 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4564 privacy_dropdown_entries = try_get(
4565 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4566 for renderer_dict in privacy_dropdown_entries:
4567 is_selected = try_get(
4568 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4569 if not is_selected:
4570 continue
052e1350 4571 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4572 if label:
4573 badge_labels.add(label.lower())
4574 break
4575
4576 for badge_label in badge_labels:
4577 if badge_label == 'unlisted':
4578 is_unlisted = True
4579 elif badge_label == 'private':
4580 is_private = True
4581 elif badge_label == 'public':
4582 is_unlisted = is_private = False
4583 return self._availability(is_private, False, False, False, is_unlisted)
4584
4585 @staticmethod
4586 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4587 sidebar_renderer = try_get(
4588 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4589 for item in sidebar_renderer:
4590 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4591 if renderer:
4592 return renderer
4593
ac56cf38 4594 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 4595 """
4596 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4597 """
5d342002 4598 browse_id = params = None
47193e02 4599 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4600 if not renderer:
4601 return
4602 menu_renderer = try_get(
4603 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4604 for menu_item in menu_renderer:
4605 if not isinstance(menu_item, dict):
358de58c 4606 continue
47193e02 4607 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4608 text = try_get(
14f25df2 4609 nav_item_renderer, lambda x: x['text']['simpleText'], str)
47193e02 4610 if not text or text.lower() != 'show unavailable videos':
4611 continue
4612 browse_endpoint = try_get(
4613 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4614 browse_id = browse_endpoint.get('browseId')
4615 params = browse_endpoint.get('params')
4616 break
5d342002 4617
11f9be09 4618 headers = self.generate_api_headers(
99e9e001 4619 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 4620 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 4621 query = {
4622 'params': params or 'wgYCCAA=',
4623 'browseId': browse_id or 'VL%s' % item_id
4624 }
4625 return self._extract_response(
4626 item_id=item_id, headers=headers, query=query,
fe93e2c4 4627 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4628 note='Downloading API JSON with unavailable videos')
358de58c 4629
2762dbb1 4630 @functools.cached_property
a25bca9f 4631 def skip_webpage(self):
4632 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4633
ac56cf38 4634 def _extract_webpage(self, url, item_id, fatal=True):
be5c1ae8 4635 webpage, data = None, None
4636 for retry in self.RetryManager(fatal=fatal):
ac56cf38 4637 try:
be5c1ae8 4638 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
ac56cf38 4639 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4640 except ExtractorError as e:
4641 if isinstance(e.cause, network_exceptions):
14f25df2 4642 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
be5c1ae8 4643 retry.error = e
4644 continue
4645 self._error_or_warning(e, fatal=fatal)
14fdfea9 4646 break
ac56cf38 4647
be5c1ae8 4648 try:
4649 self._extract_and_report_alerts(data)
4650 except ExtractorError as e:
4651 self._error_or_warning(e, fatal=fatal)
4652 break
ac56cf38 4653
be5c1ae8 4654 # Sometimes youtube returns a webpage with incomplete ytInitialData
4655 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4656 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
4657 retry.error = ExtractorError('Incomplete yt initial data received')
4658 continue
ac56cf38 4659
cd7c66cf 4660 return webpage, data
4661
a25bca9f 4662 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4663 """Use if failed to extract ytcfg (and data) from initial webpage"""
4664 if not ytcfg and self.is_authenticated:
4665 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4666 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4667 raise ExtractorError(
4668 f'{msg}. If you are not downloading private content, or '
4669 'your cookies are only for the first account and channel,'
4670 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4671 expected=True)
4672 self.report_warning(msg, only_once=True)
4673
ac56cf38 4674 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4675 data = None
a25bca9f 4676 if not self.skip_webpage:
ac56cf38 4677 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4678 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
1108613f 4679 # Reject webpage data if redirected to home page without explicitly requesting
4680 selected_tab = self._extract_selected_tab(traverse_obj(
7c219ea6 4681 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
1108613f 4682 if (url != 'https://www.youtube.com/feed/recommended'
4683 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4684 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4685 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4686 if fatal:
4687 raise ExtractorError(msg, expected=True)
4688 self.report_warning(msg, only_once=True)
ac56cf38 4689 if not data:
a25bca9f 4690 self._report_playlist_authcheck(ytcfg, fatal=fatal)
ac56cf38 4691 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4692 return data, ytcfg
4693
4694 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4695 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4696 resolve_response = self._extract_response(
4697 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4698 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4699 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4700 for ep_key, ep in endpoints.items():
4701 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4702 if params:
4703 return self._extract_response(
4704 item_id=item_id, query=params, ep=ep, headers=headers,
4705 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
7c219ea6 4706 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
ac56cf38 4707 err_note = 'Failed to resolve url (does the playlist exist?)'
4708 if fatal:
4709 raise ExtractorError(err_note, expected=True)
4710 self.report_warning(err_note, item_id)
4711
a6213a49 4712 _SEARCH_PARAMS = None
4713
af5c1c55 4714 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
a6213a49 4715 data = {'query': query}
4716 if params is NO_DEFAULT:
4717 params = self._SEARCH_PARAMS
4718 if params:
4719 data['params'] = params
16aa9ea4 4720
4721 content_keys = (
4722 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4723 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4724 # ytmusic search
4725 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4726 ('continuationContents', ),
4727 )
a25bca9f 4728 display_id = f'query "{query}"'
86e5f3ed 4729 check_get_keys = tuple({keys[0] for keys in content_keys})
a25bca9f 4730 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4731 self._report_playlist_authcheck(ytcfg, fatal=False)
16aa9ea4 4732
a61fd4cf 4733 continuation_list = [None]
a25bca9f 4734 search = None
a6213a49 4735 for page_num in itertools.count(1):
a61fd4cf 4736 data.update(continuation_list[0] or {})
a25bca9f 4737 headers = self.generate_api_headers(
4738 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
a6213a49 4739 search = self._extract_response(
a25bca9f 4740 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4741 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
16aa9ea4 4742 slr_contents = traverse_obj(search, *content_keys)
4743 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
a61fd4cf 4744 if not continuation_list[0]:
a6213a49 4745 break
4746
4747
4748class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4749 IE_DESC = 'YouTube Tabs'
4750 _VALID_URL = r'''(?x:
4751 https?://
4752 (?:\w+\.)?
4753 (?:
4754 youtube(?:kids)?\.com|
4755 %(invidious)s
4756 )/
4757 (?:
4758 (?P<channel_type>channel|c|user|browse)/|
4759 (?P<not_channel>
4760 feed/|hashtag/|
4761 (?:playlist|watch)\?.*?\blist=
4762 )|
4763 (?!(?:%(reserved_names)s)\b) # Direct URLs
4764 )
4765 (?P<id>[^/?\#&]+)
4766 )''' % {
4767 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4768 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4769 }
4770 IE_NAME = 'youtube:tab'
4771
4772 _TESTS = [{
4773 'note': 'playlists, multipage',
4774 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4775 'playlist_mincount': 94,
4776 'info_dict': {
4777 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4778 'title': 'Igor Kleiner - Playlists',
a6213a49 4779 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 4780 'uploader': 'Igor Kleiner',
a6213a49 4781 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4782 'channel': 'Igor Kleiner',
4783 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4784 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4785 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4786 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4787 'channel_follower_count': int
a6213a49 4788 },
4789 }, {
4790 'note': 'playlists, multipage, different order',
4791 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4792 'playlist_mincount': 94,
4793 'info_dict': {
4794 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4795 'title': 'Igor Kleiner - Playlists',
a6213a49 4796 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4797 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4798 'uploader': 'Igor Kleiner',
4799 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4800 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4801 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4802 'channel': 'Igor Kleiner',
4803 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4804 'channel_follower_count': int
a6213a49 4805 },
4806 }, {
4807 'note': 'playlists, series',
4808 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4809 'playlist_mincount': 5,
4810 'info_dict': {
4811 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4812 'title': '3Blue1Brown - Playlists',
4813 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4814 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4815 'uploader': '3Blue1Brown',
976ae3ea 4816 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4817 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4818 'channel': '3Blue1Brown',
4819 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4820 'tags': ['Mathematics'],
6c73052c 4821 'channel_follower_count': int
a6213a49 4822 },
4823 }, {
4824 'note': 'playlists, singlepage',
4825 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4826 'playlist_mincount': 4,
4827 'info_dict': {
4828 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4829 'title': 'ThirstForScience - Playlists',
4830 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4831 'uploader': 'ThirstForScience',
4832 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
976ae3ea 4833 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4834 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4835 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4836 'tags': 'count:13',
4837 'channel': 'ThirstForScience',
6c73052c 4838 'channel_follower_count': int
a6213a49 4839 }
4840 }, {
4841 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4842 'only_matching': True,
4843 }, {
4844 'note': 'basic, single video playlist',
4845 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4846 'info_dict': {
4847 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4848 'uploader': 'Sergey M.',
4849 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4850 'title': 'youtube-dl public playlist',
976ae3ea 4851 'description': '',
4852 'tags': [],
4853 'view_count': int,
4854 'modified_date': '20201130',
4855 'channel': 'Sergey M.',
4856 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4857 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4858 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4859 },
4860 'playlist_count': 1,
4861 }, {
4862 'note': 'empty playlist',
4863 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4864 'info_dict': {
4865 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4866 'uploader': 'Sergey M.',
4867 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4868 'title': 'youtube-dl empty playlist',
976ae3ea 4869 'tags': [],
4870 'channel': 'Sergey M.',
4871 'description': '',
4872 'modified_date': '20160902',
4873 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4874 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4875 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4876 },
4877 'playlist_count': 0,
4878 }, {
4879 'note': 'Home tab',
4880 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
4881 'info_dict': {
4882 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4883 'title': 'lex will - Home',
4884 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4885 'uploader': 'lex will',
4886 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4887 'channel': 'lex will',
4888 'tags': ['bible', 'history', 'prophesy'],
4889 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4890 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4891 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 4892 'channel_follower_count': int
a6213a49 4893 },
4894 'playlist_mincount': 2,
4895 }, {
4896 'note': 'Videos tab',
4897 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4898 'info_dict': {
4899 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4900 'title': 'lex will - Videos',
4901 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4902 'uploader': 'lex will',
4903 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4904 'tags': ['bible', 'history', 'prophesy'],
4905 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4906 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4907 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4908 'channel': 'lex will',
6c73052c 4909 'channel_follower_count': int
a6213a49 4910 },
4911 'playlist_mincount': 975,
4912 }, {
4913 'note': 'Videos tab, sorted by popular',
4914 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4915 'info_dict': {
4916 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4917 'title': 'lex will - Videos',
4918 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4919 'uploader': 'lex will',
4920 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4921 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4922 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4923 'channel': 'lex will',
4924 'tags': ['bible', 'history', 'prophesy'],
4925 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 4926 'channel_follower_count': int
a6213a49 4927 },
4928 'playlist_mincount': 199,
4929 }, {
4930 'note': 'Playlists tab',
4931 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4932 'info_dict': {
4933 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4934 'title': 'lex will - Playlists',
4935 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4936 'uploader': 'lex will',
4937 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4938 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4939 'channel': 'lex will',
4940 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4941 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4942 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4943 'channel_follower_count': int
a6213a49 4944 },
4945 'playlist_mincount': 17,
4946 }, {
4947 'note': 'Community tab',
4948 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4949 'info_dict': {
4950 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4951 'title': 'lex will - Community',
4952 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4953 'uploader': 'lex will',
4954 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4955 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4956 'channel': 'lex will',
4957 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4958 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4959 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4960 'channel_follower_count': int
a6213a49 4961 },
4962 'playlist_mincount': 18,
4963 }, {
4964 'note': 'Channels tab',
4965 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4966 'info_dict': {
4967 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4968 'title': 'lex will - Channels',
4969 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4970 'uploader': 'lex will',
4971 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4972 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4973 'channel': 'lex will',
4974 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4975 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4976 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4977 'channel_follower_count': int
a6213a49 4978 },
4979 'playlist_mincount': 12,
4980 }, {
4981 'note': 'Search tab',
4982 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4983 'playlist_mincount': 40,
4984 'info_dict': {
4985 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4986 'title': '3Blue1Brown - Search - linear algebra',
4987 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4988 'uploader': '3Blue1Brown',
4989 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 4990 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4991 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4992 'tags': ['Mathematics'],
4993 'channel': '3Blue1Brown',
4994 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6c73052c 4995 'channel_follower_count': int
a6213a49 4996 },
4997 }, {
4998 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4999 'only_matching': True,
5000 }, {
5001 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5002 'only_matching': True,
5003 }, {
5004 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5005 'only_matching': True,
5006 }, {
5007 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5008 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5009 'info_dict': {
5010 'title': '29C3: Not my department',
5011 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5012 'uploader': 'Christiaan008',
5013 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5014 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 5015 'tags': [],
5016 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5017 'view_count': int,
5018 'modified_date': '20150605',
5019 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5020 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5021 'channel': 'Christiaan008',
a6213a49 5022 },
5023 'playlist_count': 96,
5024 }, {
5025 'note': 'Large playlist',
5026 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5027 'info_dict': {
5028 'title': 'Uploads from Cauchemar',
5029 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5030 'uploader': 'Cauchemar',
5031 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 5032 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
5033 'tags': [],
5034 'modified_date': r're:\d{8}',
5035 'channel': 'Cauchemar',
5036 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
5037 'view_count': int,
5038 'description': '',
5039 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
a6213a49 5040 },
5041 'playlist_mincount': 1123,
976ae3ea 5042 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5043 }, {
5044 'note': 'even larger playlist, 8832 videos',
5045 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5046 'only_matching': True,
5047 }, {
5048 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5049 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5050 'info_dict': {
5051 'title': 'Uploads from Interstellar Movie',
5052 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5053 'uploader': 'Interstellar Movie',
5054 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 5055 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
5056 'tags': [],
5057 'view_count': int,
5058 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5059 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
5060 'channel': 'Interstellar Movie',
5061 'description': '',
5062 'modified_date': r're:\d{8}',
a6213a49 5063 },
5064 'playlist_mincount': 21,
5065 }, {
5066 'note': 'Playlist with "show unavailable videos" button',
5067 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5068 'info_dict': {
5069 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5070 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5071 'uploader': 'Phim Siêu Nhân Nhật Bản',
5072 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 5073 'view_count': int,
5074 'channel': 'Phim Siêu Nhân Nhật Bản',
5075 'tags': [],
5076 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5077 'description': '',
5078 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5079 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5080 'modified_date': r're:\d{8}',
a6213a49 5081 },
5082 'playlist_mincount': 200,
976ae3ea 5083 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5084 }, {
5085 'note': 'Playlist with unavailable videos in page 7',
5086 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5087 'info_dict': {
5088 'title': 'Uploads from BlankTV',
5089 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5090 'uploader': 'BlankTV',
5091 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5092 'channel': 'BlankTV',
5093 'channel_url': 'https://www.youtube.com/c/blanktv',
5094 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5095 'view_count': int,
5096 'tags': [],
5097 'uploader_url': 'https://www.youtube.com/c/blanktv',
5098 'modified_date': r're:\d{8}',
5099 'description': '',
a6213a49 5100 },
5101 'playlist_mincount': 1000,
976ae3ea 5102 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5103 }, {
5104 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5105 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5106 'info_dict': {
5107 'title': 'Data Analysis with Dr Mike Pound',
5108 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5109 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5110 'uploader': 'Computerphile',
5111 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 5112 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5113 'tags': [],
5114 'view_count': int,
5115 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5116 'channel_url': 'https://www.youtube.com/user/Computerphile',
5117 'channel': 'Computerphile',
a6213a49 5118 },
5119 'playlist_mincount': 11,
5120 }, {
5121 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5122 'only_matching': True,
5123 }, {
5124 'note': 'Playlist URL that does not actually serve a playlist',
5125 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5126 'info_dict': {
5127 'id': 'FqZTN594JQw',
5128 'ext': 'webm',
5129 'title': "Smiley's People 01 detective, Adventure Series, Action",
5130 'uploader': 'STREEM',
5131 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5132 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5133 'upload_date': '20150526',
5134 'license': 'Standard YouTube License',
5135 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5136 'categories': ['People & Blogs'],
5137 'tags': list,
5138 'view_count': int,
5139 'like_count': int,
a6213a49 5140 },
5141 'params': {
5142 'skip_download': True,
5143 },
5144 'skip': 'This video is not available.',
5145 'add_ie': [YoutubeIE.ie_key()],
5146 }, {
5147 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5148 'only_matching': True,
5149 }, {
5150 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5151 'only_matching': True,
5152 }, {
5153 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5154 'info_dict': {
12a1b225 5155 'id': 'Wq15eF5vCbI', # This will keep changing
a6213a49 5156 'ext': 'mp4',
976ae3ea 5157 'title': str,
a6213a49 5158 'uploader': 'Sky News',
5159 'uploader_id': 'skynews',
5160 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5161 'upload_date': r're:\d{8}',
976ae3ea 5162 'description': str,
a6213a49 5163 'categories': ['News & Politics'],
5164 'tags': list,
5165 'like_count': int,
6c73052c 5166 'release_timestamp': 1642502819,
976ae3ea 5167 'channel': 'Sky News',
5168 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5169 'age_limit': 0,
5170 'view_count': int,
6c73052c 5171 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
976ae3ea 5172 'playable_in_embed': True,
6c73052c 5173 'release_date': '20220118',
976ae3ea 5174 'availability': 'public',
5175 'live_status': 'is_live',
5176 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
6c73052c 5177 'channel_follower_count': int
a6213a49 5178 },
5179 'params': {
5180 'skip_download': True,
5181 },
976ae3ea 5182 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 5183 }, {
5184 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5185 'info_dict': {
5186 'id': 'a48o2S1cPoo',
5187 'ext': 'mp4',
5188 'title': 'The Young Turks - Live Main Show',
5189 'uploader': 'The Young Turks',
5190 'uploader_id': 'TheYoungTurks',
5191 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5192 'upload_date': '20150715',
5193 'license': 'Standard YouTube License',
5194 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5195 'categories': ['News & Politics'],
5196 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5197 'like_count': int,
a6213a49 5198 },
5199 'params': {
5200 'skip_download': True,
5201 },
5202 'only_matching': True,
5203 }, {
5204 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5205 'only_matching': True,
5206 }, {
5207 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5208 'only_matching': True,
5209 }, {
5210 'note': 'A channel that is not live. Should raise error',
5211 'url': 'https://www.youtube.com/user/numberphile/live',
5212 'only_matching': True,
5213 }, {
5214 'url': 'https://www.youtube.com/feed/trending',
5215 'only_matching': True,
5216 }, {
5217 'url': 'https://www.youtube.com/feed/library',
5218 'only_matching': True,
5219 }, {
5220 'url': 'https://www.youtube.com/feed/history',
5221 'only_matching': True,
5222 }, {
5223 'url': 'https://www.youtube.com/feed/subscriptions',
5224 'only_matching': True,
5225 }, {
5226 'url': 'https://www.youtube.com/feed/watch_later',
5227 'only_matching': True,
5228 }, {
5229 'note': 'Recommended - redirects to home page.',
5230 'url': 'https://www.youtube.com/feed/recommended',
5231 'only_matching': True,
5232 }, {
5233 'note': 'inline playlist with not always working continuations',
5234 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5235 'only_matching': True,
5236 }, {
5237 'url': 'https://www.youtube.com/course',
5238 'only_matching': True,
5239 }, {
5240 'url': 'https://www.youtube.com/zsecurity',
5241 'only_matching': True,
5242 }, {
5243 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5244 'only_matching': True,
5245 }, {
5246 'url': 'https://www.youtube.com/TheYoungTurks/live',
5247 'only_matching': True,
5248 }, {
5249 'url': 'https://www.youtube.com/hashtag/cctv9',
5250 'info_dict': {
5251 'id': 'cctv9',
5252 'title': '#cctv9',
976ae3ea 5253 'tags': [],
a6213a49 5254 },
5255 'playlist_mincount': 350,
5256 }, {
5257 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5258 'only_matching': True,
5259 }, {
5260 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5261 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5262 'only_matching': True
5263 }, {
5264 'note': '/browse/ should redirect to /channel/',
5265 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5266 'only_matching': True
5267 }, {
5268 'note': 'VLPL, should redirect to playlist?list=PL...',
5269 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5270 'info_dict': {
5271 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5272 'uploader': 'NoCopyrightSounds',
5273 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5274 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
12a1b225 5275 'title': 'NCS : All Releases 💿',
976ae3ea 5276 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5277 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5278 'modified_date': r're:\d{8}',
5279 'view_count': int,
5280 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5281 'tags': [],
5282 'channel': 'NoCopyrightSounds',
a6213a49 5283 },
5284 'playlist_mincount': 166,
976ae3ea 5285 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5286 }, {
5287 'note': 'Topic, should redirect to playlist?list=UU...',
5288 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5289 'info_dict': {
5290 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5291 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5292 'title': 'Uploads from Royalty Free Music - Topic',
5293 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5294 'tags': [],
5295 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5296 'channel': 'Royalty Free Music - Topic',
5297 'view_count': int,
5298 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5299 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5300 'modified_date': r're:\d{8}',
5301 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5302 'description': '',
a6213a49 5303 },
5304 'expected_warnings': [
a6213a49 5305 'The URL does not have a videos tab',
976ae3ea 5306 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5307 ],
5308 'playlist_mincount': 101,
5309 }, {
5310 'note': 'Topic without a UU playlist',
5311 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5312 'info_dict': {
5313 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5314 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 5315 'tags': [],
a6213a49 5316 },
5317 'expected_warnings': [
976ae3ea 5318 'the playlist redirect gave error',
a6213a49 5319 ],
5320 'playlist_mincount': 9,
5321 }, {
5322 'note': 'Youtube music Album',
5323 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5324 'info_dict': {
5325 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5326 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 5327 'tags': [],
5328 'view_count': int,
5329 'description': '',
5330 'availability': 'unlisted',
5331 'modified_date': r're:\d{8}',
a6213a49 5332 },
5333 'playlist_count': 50,
5334 }, {
5335 'note': 'unlisted single video playlist',
5336 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5337 'info_dict': {
5338 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5339 'uploader': 'colethedj',
5340 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5341 'title': 'yt-dlp unlisted playlist test',
976ae3ea 5342 'availability': 'unlisted',
5343 'tags': [],
12a1b225 5344 'modified_date': '20220418',
976ae3ea 5345 'channel': 'colethedj',
5346 'view_count': int,
5347 'description': '',
5348 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5349 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5350 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
a6213a49 5351 },
5352 'playlist_count': 1,
5353 }, {
5354 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5355 'url': 'https://www.youtube.com/feed/recommended',
5356 'info_dict': {
5357 'id': 'recommended',
5358 'title': 'recommended',
6c73052c 5359 'tags': [],
a6213a49 5360 },
5361 'playlist_mincount': 50,
5362 'params': {
5363 'skip_download': True,
5364 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5365 },
5366 }, {
5367 'note': 'API Fallback: /videos tab, sorted by oldest first',
5368 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5369 'info_dict': {
5370 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5371 'title': 'Cody\'sLab - Videos',
5372 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5373 'uploader': 'Cody\'sLab',
5374 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
976ae3ea 5375 'channel': 'Cody\'sLab',
5376 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5377 'tags': [],
5378 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5379 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6c73052c 5380 'channel_follower_count': int
a6213a49 5381 },
5382 'playlist_mincount': 650,
5383 'params': {
5384 'skip_download': True,
5385 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5386 },
5387 }, {
5388 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5389 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5390 'info_dict': {
5391 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5392 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5393 'title': 'Uploads from Royalty Free Music - Topic',
5394 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5395 'modified_date': r're:\d{8}',
5396 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5397 'description': '',
5398 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5399 'tags': [],
5400 'channel': 'Royalty Free Music - Topic',
5401 'view_count': int,
5402 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 5403 },
5404 'expected_warnings': [
976ae3ea 5405 'does not have a videos tab',
5406 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5407 ],
5408 'playlist_mincount': 101,
5409 'params': {
5410 'skip_download': True,
5411 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5412 },
7c219ea6 5413 }, {
5414 'note': 'non-standard redirect to regional channel',
5415 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5416 'only_matching': True
61d3665d 5417 }, {
5418 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5419 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5420 'info_dict': {
5421 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5422 'modified_date': '20220407',
5423 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5424 'tags': [],
5425 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5426 'uploader': 'pukkandan',
5427 'availability': 'unlisted',
5428 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5429 'channel': 'pukkandan',
5430 'description': 'Test for collaborative playlist',
5431 'title': 'yt-dlp test - collaborative playlist',
12a1b225 5432 'view_count': int,
61d3665d 5433 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5434 },
5435 'playlist_mincount': 2
a6213a49 5436 }]
5437
5438 @classmethod
5439 def suitable(cls, url):
86e5f3ed 5440 return False if YoutubeIE.suitable(url) else super().suitable(url)
9297939e 5441
64f36541 5442 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
fe03a6cd 5443
182bda88 5444 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5445 def _real_extract(self, url, smuggled_data):
cd7c66cf 5446 item_id = self._match_id(url)
14f25df2 5447 url = urllib.parse.urlunparse(
5448 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 5449 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 5450
fe03a6cd 5451 def get_mobj(url):
37e57a9f 5452 mobj = self._URL_RE.match(url).groupdict()
07cce701 5453 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 5454 return mobj
5455
37e57a9f 5456 mobj, redirect_warning = get_mobj(url), None
fe03a6cd 5457 # Youtube returns incomplete data if tabname is not lower case
5458 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 5459 if is_channel:
5460 if smuggled_data.get('is_music_url'):
37e57a9f 5461 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
fe03a6cd 5462 item_id = item_id[2:]
37e57a9f 5463 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5464 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
ac56cf38 5465 mdata = self._extract_tab_endpoint(
37e57a9f 5466 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5467 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
14f25df2 5468 get_all=False, expected_type=str)
ac56cf38 5469 if not murl:
37e57a9f 5470 raise ExtractorError('Failed to resolve album to playlist')
ac56cf38 5471 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
37e57a9f 5472 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5473 pre = f'https://www.youtube.com/channel/{item_id}'
5474
64f36541 5475 original_tab_name = tab
fe03a6cd 5476 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5477 # Home URLs should redirect to /videos/
37e57a9f 5478 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5479 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 5480 tab = '/videos'
5481
5482 url = ''.join((pre, tab, post))
5483 mobj = get_mobj(url)
cd7c66cf 5484
5485 # Handle both video/playlist URLs
201c1459 5486 qs = parse_qs(url)
86e5f3ed 5487 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
cd7c66cf 5488
fe03a6cd 5489 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 5490 if not playlist_id:
fe03a6cd 5491 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 5492 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 5493 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 5494 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5495 url = f'https://www.youtube.com/playlist?list={playlist_id}'
18db7548 5496 mobj = get_mobj(url)
cd7c66cf 5497
5498 if video_id and playlist_id:
a06916d9 5499 if self.get_param('noplaylist'):
37e57a9f 5500 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5501 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5502 ie=YoutubeIE.ie_key(), video_id=video_id)
5503 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
cd7c66cf 5504
ac56cf38 5505 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 5506
7c219ea6 5507 # YouTube may provide a non-standard redirect to the regional channel
5508 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5509 redirect_url = traverse_obj(
5510 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5511 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5512 redirect_url = ''.join((
5513 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5514 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5515 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5516
37e57a9f 5517 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
18db7548 5518 if tabs:
5519 selected_tab = self._extract_selected_tab(tabs)
64f36541 5520 selected_tab_name = selected_tab.get('title', '').lower()
5521 if selected_tab_name == 'home':
5522 selected_tab_name = 'featured'
5523 requested_tab_name = mobj['tab'][1:]
09f1580e 5524 if 'no-youtube-channel-redirect' not in compat_opts:
693f0600 5525 if requested_tab_name == 'live': # Live tab should have redirected to the video
5526 raise UserNotLive(video_id=mobj['id'])
64f36541 5527 if requested_tab_name not in ('', selected_tab_name):
5528 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5529 if not original_tab_name:
5530 if item_id[:2] == 'UC':
5531 # Topic channels don't have /videos. Use the equivalent playlist instead
5532 pl_id = f'UU{item_id[2:]}'
5533 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5534 try:
5535 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5536 except ExtractorError:
5537 redirect_warning += ' and the playlist redirect gave error'
5538 else:
5539 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5540 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5541 if selected_tab_name and selected_tab_name != requested_tab_name:
5542 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5543 else:
5544 raise ExtractorError(redirect_warning, expected=True)
18db7548 5545
37e57a9f 5546 if redirect_warning:
64f36541 5547 self.to_screen(redirect_warning)
37e57a9f 5548 self.write_debug(f'Final URL: {url}')
18db7548 5549
358de58c 5550 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 5551 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 5552 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 5553 self._extract_and_report_alerts(data, only_once=True)
37e57a9f 5554 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
8bdd16b4 5555 if tabs:
ac56cf38 5556 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 5557
37e57a9f 5558 playlist = traverse_obj(
5559 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 5560 if playlist:
ac56cf38 5561 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 5562
37e57a9f 5563 video_id = traverse_obj(
5564 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 5565 if video_id:
09f1580e 5566 if mobj['tab'] != '/live': # live tab is expected to redirect to video
37e57a9f 5567 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5568 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5569 ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 5570
8bdd16b4 5571 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 5572
c5e8d7af 5573
8bdd16b4 5574class YoutubePlaylistIE(InfoExtractor):
96565c7e 5575 IE_DESC = 'YouTube playlists'
8bdd16b4 5576 _VALID_URL = r'''(?x)(?:
5577 (?:https?://)?
5578 (?:\w+\.)?
5579 (?:
5580 (?:
5581 youtube(?:kids)?\.com|
d9190e44 5582 %(invidious)s
8bdd16b4 5583 )
5584 /.*?\?.*?\blist=
5585 )?
5586 (?P<id>%(playlist_id)s)
d9190e44
RH
5587 )''' % {
5588 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5589 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5590 }
8bdd16b4 5591 IE_NAME = 'youtube:playlist'
cdc628a4 5592 _TESTS = [{
8bdd16b4 5593 'note': 'issue #673',
5594 'url': 'PLBB231211A4F62143',
cdc628a4 5595 'info_dict': {
8bdd16b4 5596 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5597 'id': 'PLBB231211A4F62143',
976ae3ea 5598 'uploader': 'Wickman',
8bdd16b4 5599 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 5600 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 5601 'view_count': int,
5602 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5603 'modified_date': r're:\d{8}',
5604 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5605 'channel': 'Wickman',
5606 'tags': [],
5607 'channel_url': 'https://www.youtube.com/user/Wickydoo',
8bdd16b4 5608 },
5609 'playlist_mincount': 29,
5610 }, {
5611 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5612 'info_dict': {
5613 'title': 'YDL_safe_search',
5614 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5615 },
5616 'playlist_count': 2,
5617 'skip': 'This playlist is private',
9558dcec 5618 }, {
8bdd16b4 5619 'note': 'embedded',
5620 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5621 'playlist_count': 4,
9558dcec 5622 'info_dict': {
8bdd16b4 5623 'title': 'JODA15',
5624 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5625 'uploader': 'milan',
5626 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
976ae3ea 5627 'description': '',
5628 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5629 'tags': [],
5630 'modified_date': '20140919',
5631 'view_count': int,
5632 'channel': 'milan',
5633 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5634 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5635 },
5636 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
cdc628a4 5637 }, {
8bdd16b4 5638 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
12a1b225 5639 'playlist_mincount': 455,
8bdd16b4 5640 'info_dict': {
5641 'title': '2018 Chinese New Singles (11/6 updated)',
5642 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5643 'uploader': 'LBK',
5644 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 5645 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 5646 'channel': 'LBK',
5647 'view_count': int,
5648 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5649 'tags': [],
5650 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5651 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5652 'modified_date': r're:\d{8}',
5653 },
5654 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 5655 }, {
29f7c58a 5656 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5657 'only_matching': True,
5658 }, {
5659 # music album playlist
5660 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5661 'only_matching': True,
5662 }]
5663
5664 @classmethod
5665 def suitable(cls, url):
201c1459 5666 if YoutubeTabIE.suitable(url):
5667 return False
49a57e70 5668 from ..utils import parse_qs
201c1459 5669 qs = parse_qs(url)
5670 if qs.get('v', [None])[0]:
5671 return False
86e5f3ed 5672 return super().suitable(url)
29f7c58a 5673
5674 def _real_extract(self, url):
5675 playlist_id = self._match_id(url)
46953e7e 5676 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 5677 url = update_url_query(
5678 'https://www.youtube.com/playlist',
5679 parse_qs(url) or {'list': playlist_id})
5680 if is_music_url:
5681 url = smuggle_url(url, {'is_music_url': True})
5682 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 5683
5684
5685class YoutubeYtBeIE(InfoExtractor):
c76eb41b 5686 IE_DESC = 'youtu.be'
29f7c58a 5687 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5688 _TESTS = [{
8bdd16b4 5689 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5690 'info_dict': {
5691 'id': 'yeWKywCrFtk',
5692 'ext': 'mp4',
5693 'title': 'Small Scale Baler and Braiding Rugs',
5694 'uploader': 'Backus-Page House Museum',
5695 'uploader_id': 'backuspagemuseum',
5696 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5697 'upload_date': '20161008',
5698 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5699 'categories': ['Nonprofits & Activism'],
5700 'tags': list,
5701 'like_count': int,
976ae3ea 5702 'age_limit': 0,
5703 'playable_in_embed': True,
5704 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5705 'channel': 'Backus-Page House Museum',
5706 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5707 'live_status': 'not_live',
5708 'view_count': int,
5709 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5710 'availability': 'public',
5711 'duration': 59,
12a1b225
A
5712 'comment_count': int,
5713 'channel_follower_count': int
8bdd16b4 5714 },
5715 'params': {
5716 'noplaylist': True,
5717 'skip_download': True,
5718 },
39e7107d 5719 }, {
8bdd16b4 5720 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 5721 'only_matching': True,
cdc628a4
PH
5722 }]
5723
8bdd16b4 5724 def _real_extract(self, url):
5ad28e7f 5725 mobj = self._match_valid_url(url)
29f7c58a 5726 video_id = mobj.group('id')
5727 playlist_id = mobj.group('playlist_id')
8bdd16b4 5728 return self.url_result(
29f7c58a 5729 update_url_query('https://www.youtube.com/watch', {
5730 'v': video_id,
5731 'list': playlist_id,
5732 'feature': 'youtu.be',
5733 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 5734
5735
b6ce9bb0 5736class YoutubeLivestreamEmbedIE(InfoExtractor):
5737 IE_DESC = 'YouTube livestream embeds'
5738 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5739 _TESTS = [{
5740 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5741 'only_matching': True,
5742 }]
5743
5744 def _real_extract(self, url):
5745 channel_id = self._match_id(url)
5746 return self.url_result(
5747 f'https://www.youtube.com/channel/{channel_id}/live',
5748 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5749
5750
8bdd16b4 5751class YoutubeYtUserIE(InfoExtractor):
96565c7e 5752 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 5753 IE_NAME = 'youtube:user'
8bdd16b4 5754 _VALID_URL = r'ytuser:(?P<id>.+)'
5755 _TESTS = [{
5756 'url': 'ytuser:phihag',
5757 'only_matching': True,
5758 }]
5759
5760 def _real_extract(self, url):
5761 user_id = self._match_id(url)
5762 return self.url_result(
c586f9e8 5763 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 5764 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 5765
b05654f0 5766
3d3dddc9 5767class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 5768 IE_NAME = 'youtube:favorites'
96565c7e 5769 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 5770 _VALID_URL = r':ytfav(?:ou?rite)?s?'
5771 _LOGIN_REQUIRED = True
5772 _TESTS = [{
5773 'url': ':ytfav',
5774 'only_matching': True,
5775 }, {
5776 'url': ':ytfavorites',
5777 'only_matching': True,
5778 }]
5779
5780 def _real_extract(self, url):
5781 return self.url_result(
5782 'https://www.youtube.com/playlist?list=LL',
5783 ie=YoutubeTabIE.ie_key())
5784
5785
ca5300c7 5786class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
5787 IE_NAME = 'youtube:notif'
5788 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
5789 _VALID_URL = r':ytnotif(?:ication)?s?'
5790 _LOGIN_REQUIRED = True
5791 _TESTS = [{
5792 'url': ':ytnotif',
5793 'only_matching': True,
5794 }, {
5795 'url': ':ytnotifications',
5796 'only_matching': True,
5797 }]
5798
5799 def _extract_notification_menu(self, response, continuation_list):
5800 notification_list = traverse_obj(
5801 response,
5802 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
5803 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
5804 expected_type=list) or []
5805 continuation_list[0] = None
5806 for item in notification_list:
5807 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
5808 if entry:
5809 yield entry
5810 continuation = item.get('continuationItemRenderer')
5811 if continuation:
5812 continuation_list[0] = continuation
5813
5814 def _extract_notification_renderer(self, notification):
5815 video_id = traverse_obj(
5816 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
5817 url = f'https://www.youtube.com/watch?v={video_id}'
5818 channel_id = None
5819 if not video_id:
5820 browse_ep = traverse_obj(
5821 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
5822 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
5823 post_id = self._search_regex(
5824 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
5825 'post id', default=None)
5826 if not channel_id or not post_id:
5827 return
5828 # The direct /post url redirects to this in the browser
5829 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
5830
5831 channel = traverse_obj(
5832 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
5833 expected_type=str)
c7a7baaa 5834 notification_title = self._get_text(notification, 'shortMessage')
5835 if notification_title:
5836 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
5837 # TODO: handle recommended videos
ca5300c7 5838 title = self._search_regex(
c7a7baaa 5839 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
ca5300c7 5840 'video title', default=None)
ca5300c7 5841 upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
5842 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
5843 else None)
5844 return {
5845 '_type': 'url',
5846 'url': url,
5847 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
5848 'video_id': video_id,
5849 'title': title,
5850 'channel_id': channel_id,
5851 'channel': channel,
5852 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5853 'upload_date': upload_date,
5854 }
5855
5856 def _notification_menu_entries(self, ytcfg):
5857 continuation_list = [None]
5858 response = None
5859 for page in itertools.count(1):
5860 ctoken = traverse_obj(
5861 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
5862 response = self._extract_response(
5863 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
5864 ep='notification/get_notification_menu', check_get_keys='actions',
5865 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
5866 yield from self._extract_notification_menu(response, continuation_list)
5867 if not continuation_list[0]:
5868 break
5869
5870 def _real_extract(self, url):
5871 display_id = 'notifications'
5872 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
5873 self._report_playlist_authcheck(ytcfg)
5874 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
5875
5876
a6213a49 5877class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5878 IE_DESC = 'YouTube search'
78caa52a 5879 IE_NAME = 'youtube:search'
b05654f0 5880 _SEARCH_KEY = 'ytsearch'
a61fd4cf 5881 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
84bbc545 5882 _TESTS = [{
5883 'url': 'ytsearch5:youtube-dl test video',
5884 'playlist_count': 5,
5885 'info_dict': {
5886 'id': 'youtube-dl test video',
5887 'title': 'youtube-dl test video',
5888 }
5889 }]
b05654f0 5890
a61fd4cf 5891
5f7cb91a 5892class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 5893 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 5894 _SEARCH_KEY = 'ytsearchdate'
a6213a49 5895 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 5896 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
84bbc545 5897 _TESTS = [{
5898 'url': 'ytsearchdate5:youtube-dl test video',
5899 'playlist_count': 5,
5900 'info_dict': {
5901 'id': 'youtube-dl test video',
5902 'title': 'youtube-dl test video',
5903 }
5904 }]
75dff0ee 5905
c9ae7b95 5906
a6213a49 5907class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 5908 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 5909 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
182bda88 5910 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
3462ffa8 5911 _TESTS = [{
5912 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
5913 'playlist_mincount': 5,
5914 'info_dict': {
11f9be09 5915 'id': 'youtube-dl test video',
3462ffa8 5916 'title': 'youtube-dl test video',
5917 }
a61fd4cf 5918 }, {
5919 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
5920 'playlist_mincount': 5,
5921 'info_dict': {
5922 'id': 'python',
5923 'title': 'python',
5924 }
ad210f4f 5925 }, {
5926 'url': 'https://www.youtube.com/results?search_query=%23cats',
5927 'playlist_mincount': 1,
5928 'info_dict': {
5929 'id': '#cats',
5930 'title': '#cats',
12a1b225
A
5931 # The test suite does not have support for nested playlists
5932 # 'entries': [{
5933 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
5934 # 'title': '#cats',
5935 # }],
ad210f4f 5936 },
3462ffa8 5937 }, {
5938 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
5939 'only_matching': True,
5940 }]
5941
5942 def _real_extract(self, url):
4dfbf869 5943 qs = parse_qs(url)
386e1dd9 5944 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 5945 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 5946
5947
16aa9ea4 5948class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
62b58c09 5949 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
16aa9ea4 5950 IE_NAME = 'youtube:music:search_url'
5951 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5952 _TESTS = [{
5953 'url': 'https://music.youtube.com/search?q=royalty+free+music',
5954 'playlist_count': 16,
5955 'info_dict': {
5956 'id': 'royalty free music',
5957 'title': 'royalty free music',
5958 }
5959 }, {
5960 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
5961 'playlist_mincount': 30,
5962 'info_dict': {
5963 'id': 'royalty free music - songs',
5964 'title': 'royalty free music - songs',
5965 },
5966 'params': {'extract_flat': 'in_playlist'}
5967 }, {
5968 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
5969 'playlist_mincount': 30,
5970 'info_dict': {
5971 'id': 'royalty free music - community playlists',
5972 'title': 'royalty free music - community playlists',
5973 },
5974 'params': {'extract_flat': 'in_playlist'}
5975 }]
5976
5977 _SECTIONS = {
5978 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
5979 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
5980 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
5981 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
5982 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
5983 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
5984 }
5985
5986 def _real_extract(self, url):
5987 qs = parse_qs(url)
5988 query = (qs.get('search_query') or qs.get('q'))[0]
5989 params = qs.get('sp', (None,))[0]
5990 if params:
5991 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
5992 else:
ac668111 5993 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
16aa9ea4 5994 params = self._SECTIONS.get(section)
5995 if not params:
5996 section = None
5997 title = join_nonempty(query, section, delim=' - ')
af5c1c55 5998 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
16aa9ea4 5999
6000
182bda88 6001class YoutubeFeedsInfoExtractor(InfoExtractor):
d7ae0639 6002 """
25f14e9f 6003 Base class for feed extractors
82d02080 6004 Subclasses must re-define the _FEED_NAME property.
d7ae0639 6005 """
b2e8bc1b 6006 _LOGIN_REQUIRED = True
82d02080 6007 _FEED_NAME = 'feeds'
a25bca9f 6008
6009 def _real_initialize(self):
6010 YoutubeBaseInfoExtractor._check_login_required(self)
d7ae0639 6011
82d02080 6012 @classproperty
d7ae0639 6013 def IE_NAME(self):
82d02080 6014 return f'youtube:{self._FEED_NAME}'
04cc9617 6015
3853309f 6016 def _real_extract(self, url):
3d3dddc9 6017 return self.url_result(
182bda88 6018 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
25f14e9f
S
6019
6020
ef2f3c7f 6021class YoutubeWatchLaterIE(InfoExtractor):
6022 IE_NAME = 'youtube:watchlater'
96565c7e 6023 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 6024 _VALID_URL = r':ytwatchlater'
bc7a9cd8 6025 _TESTS = [{
8bdd16b4 6026 'url': ':ytwatchlater',
bc7a9cd8
S
6027 'only_matching': True,
6028 }]
25f14e9f
S
6029
6030 def _real_extract(self, url):
ef2f3c7f 6031 return self.url_result(
6032 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 6033
6034
25f14e9f 6035class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 6036 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 6037 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 6038 _FEED_NAME = 'recommended'
45db527f 6039 _LOGIN_REQUIRED = False
3d3dddc9 6040 _TESTS = [{
6041 'url': ':ytrec',
6042 'only_matching': True,
6043 }, {
6044 'url': ':ytrecommended',
6045 'only_matching': True,
6046 }, {
6047 'url': 'https://youtube.com',
6048 'only_matching': True,
6049 }]
1ed5b5c9 6050
1ed5b5c9 6051
25f14e9f 6052class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 6053 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 6054 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 6055 _FEED_NAME = 'subscriptions'
3d3dddc9 6056 _TESTS = [{
6057 'url': ':ytsubs',
6058 'only_matching': True,
6059 }, {
6060 'url': ':ytsubscriptions',
6061 'only_matching': True,
6062 }]
1ed5b5c9 6063
1ed5b5c9 6064
25f14e9f 6065class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 6066 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 6067 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 6068 _FEED_NAME = 'history'
3d3dddc9 6069 _TESTS = [{
6070 'url': ':ythistory',
6071 'only_matching': True,
6072 }]
1ed5b5c9
JMF
6073
6074
6e634cbe 6075class YoutubeStoriesIE(InfoExtractor):
6076 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6077 IE_NAME = 'youtube:stories'
6078 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6079 _TESTS = [{
6080 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6081 'only_matching': True,
6082 }]
6083
6084 def _real_extract(self, url):
6085 playlist_id = f'RLTD{self._match_id(url)}'
6086 return self.url_result(
50ac0e54 6087 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6e634cbe 6088 ie=YoutubeTabIE, video_id=playlist_id)
6089
6090
15870e90
PH
6091class YoutubeTruncatedURLIE(InfoExtractor):
6092 IE_NAME = 'youtube:truncated_url'
6093 IE_DESC = False # Do not list
975d35db 6094 _VALID_URL = r'''(?x)
b95aab84
PH
6095 (?:https?://)?
6096 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6097 (?:watch\?(?:
c4808c60 6098 feature=[a-z_]+|
b95aab84
PH
6099 annotation_id=annotation_[^&]+|
6100 x-yt-cl=[0-9]+|
c1708b89 6101 hl=[^&]*|
287be8c6 6102 t=[0-9]+
b95aab84
PH
6103 )?
6104 |
6105 attribution_link\?a=[^&]+
6106 )
6107 $
975d35db 6108 '''
15870e90 6109
c4808c60 6110 _TESTS = [{
2d3d2997 6111 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 6112 'only_matching': True,
dc2fc736 6113 }, {
2d3d2997 6114 'url': 'https://www.youtube.com/watch?',
dc2fc736 6115 'only_matching': True,
b95aab84
PH
6116 }, {
6117 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6118 'only_matching': True,
6119 }, {
6120 'url': 'https://www.youtube.com/watch?feature=foo',
6121 'only_matching': True,
c1708b89
PH
6122 }, {
6123 'url': 'https://www.youtube.com/watch?hl=en-GB',
6124 'only_matching': True,
287be8c6
PH
6125 }, {
6126 'url': 'https://www.youtube.com/watch?t=2372',
6127 'only_matching': True,
c4808c60
PH
6128 }]
6129
15870e90
PH
6130 def _real_extract(self, url):
6131 raise ExtractorError(
78caa52a
PH
6132 'Did you forget to quote the URL? Remember that & is a meta '
6133 'character in most shells, so you want to put the URL in quotes, '
3867038a 6134 'like youtube-dl '
2d3d2997 6135 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 6136 ' or simply youtube-dl BaW_jenozKc .',
15870e90 6137 expected=True)
772fd5cc
PH
6138
6139
471d0367 6140class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
3cd786db 6141 IE_NAME = 'youtube:clip'
471d0367 6142 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6143 _TESTS = [{
6144 # FIXME: Other metadata should be extracted from the clip, not from the base video
6145 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6146 'info_dict': {
6147 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6148 'ext': 'mp4',
6149 'section_start': 29.0,
6150 'section_end': 39.7,
6151 'duration': 10.7,
12a1b225
A
6152 'age_limit': 0,
6153 'availability': 'public',
6154 'categories': ['Gaming'],
6155 'channel': 'Scott The Woz',
6156 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6157 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6158 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6159 'like_count': int,
6160 'playable_in_embed': True,
6161 'tags': 'count:17',
6162 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6163 'title': 'Mobile Games on Console - Scott The Woz',
6164 'upload_date': '20210920',
6165 'uploader': 'Scott The Woz',
6166 'uploader_id': 'scottthewoz',
6167 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6168 'view_count': int,
6169 'live_status': 'not_live',
6170 'channel_follower_count': int
471d0367 6171 }
6172 }]
3cd786db 6173
6174 def _real_extract(self, url):
471d0367 6175 clip_id = self._match_id(url)
6176 _, data = self._extract_webpage(url, clip_id)
6177
6178 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6179 if not video_id:
6180 raise ExtractorError('Unable to find video ID')
6181
6182 clip_data = traverse_obj(data, (
6183 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6184 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6185 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6186 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6187
6188 return {
6189 '_type': 'url_transparent',
6190 'url': f'https://www.youtube.com/watch?v={video_id}',
6191 'ie_key': YoutubeIE.ie_key(),
6192 'id': clip_id,
6193 'section_start': int(clip_data['startTimeMs']) / 1000,
6194 'section_end': int(clip_data['endTimeMs']) / 1000,
6195 }
3cd786db 6196
6197
772fd5cc
PH
6198class YoutubeTruncatedIDIE(InfoExtractor):
6199 IE_NAME = 'youtube:truncated_id'
6200 IE_DESC = False # Do not list
b95aab84 6201 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
6202
6203 _TESTS = [{
6204 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6205 'only_matching': True,
6206 }]
6207
6208 def _real_extract(self, url):
6209 video_id = self._match_id(url)
6210 raise ExtractorError(
86e5f3ed 6211 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
772fd5cc 6212 expected=True)