]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[extractor/uktv] Improve _VALID_URL
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
6e634cbe 1import base64
d92f5d5a 2import calendar
109dd3b2 3import copy
fe93e2c4 4import datetime
a5c56234 5import hashlib
0ca96d48 6import itertools
c5e8d7af 7import json
720c3099 8import math
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
46383212 12import sys
f8271158 13import threading
8a784c74 14import time
e0df6211 15import traceback
14f25df2 16import urllib.error
ac668111 17import urllib.parse
c5e8d7af 18
b05654f0 19from .common import InfoExtractor, SearchInfoExtractor
25836db6 20from .openload import PhantomJSwrapper
14f25df2 21from ..compat import functools
545cc85d 22from ..jsinterp import JSInterpreter
4bb4a188 23from ..utils import (
f8271158 24 NO_DEFAULT,
25 ExtractorError,
693f0600 26 UserNotLive,
720c3099 27 bug_reports_message,
82d02080 28 classproperty,
c5e8d7af 29 clean_html,
d92f5d5a 30 datetime_from_str,
11f9be09 31 dict_get,
2d30521a 32 float_or_none,
11f9be09 33 format_field,
ff91cf74 34 get_first,
dd27fd17 35 int_or_none,
641ad5d8 36 is_html,
34921b43 37 join_nonempty,
48416bc4 38 js_to_json,
94278f72 39 mimetype2ext,
9c0d7f49 40 network_exceptions,
11f9be09 41 orderedSet,
6310acf5 42 parse_codecs,
49bd8c66 43 parse_count,
7c80519c 44 parse_duration,
7ea65411 45 parse_iso8601,
4dfbf869 46 parse_qs,
dca3ff4a 47 qualities,
3995d37d 48 remove_start,
cf7e015f 49 smuggle_url,
dbdaaa23 50 str_or_none,
c93d53f5 51 str_to_int,
f3aa3c3f 52 strftime_or_none,
7c365c21 53 traverse_obj,
556dbe7f 54 try_get,
c5e8d7af
PH
55 unescapeHTML,
56 unified_strdate,
f0d785d3 57 unified_timestamp,
cf7e015f 58 unsmuggle_url,
8bdd16b4 59 update_url_query,
21c340b8 60 url_or_none,
fe93e2c4 61 urljoin,
7c365c21 62 variadic,
c5e8d7af
PH
63)
64
962ffcf8 65# any clients starting with _ cannot be explicitly requested by the user
000c15a4 66INNERTUBE_CLIENTS = {
67 'web': {
68 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
69 'INNERTUBE_CONTEXT': {
70 'client': {
71 'clientName': 'WEB',
a0c830f4 72 'clientVersion': '2.20220801.00.00',
000c15a4 73 }
74 },
75 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
76 },
77 'web_embedded': {
78 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
79 'INNERTUBE_CONTEXT': {
80 'client': {
81 'clientName': 'WEB_EMBEDDED_PLAYER',
a0c830f4 82 'clientVersion': '1.20220731.00.00',
000c15a4 83 },
84 },
85 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
86 },
87 'web_music': {
88 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
89 'INNERTUBE_HOST': 'music.youtube.com',
90 'INNERTUBE_CONTEXT': {
91 'client': {
92 'clientName': 'WEB_REMIX',
a0c830f4 93 'clientVersion': '1.20220727.01.00',
000c15a4 94 }
95 },
96 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
97 },
e7e94f2a 98 'web_creator': {
18c7683d 99 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
e7e94f2a
D
100 'INNERTUBE_CONTEXT': {
101 'client': {
102 'clientName': 'WEB_CREATOR',
a0c830f4 103 'clientVersion': '1.20220726.00.00',
e7e94f2a
D
104 }
105 },
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
107 },
000c15a4 108 'android': {
18c7683d 109 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
000c15a4 110 'INNERTUBE_CONTEXT': {
111 'client': {
112 'clientName': 'ANDROID',
c7dcf0b3 113 'clientVersion': '17.29.34',
114 'androidSdkVersion': 30
000c15a4 115 }
116 },
117 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
b6de707d 118 'REQUIRE_JS_PLAYER': False
000c15a4 119 },
120 'android_embedded': {
18c7683d 121 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
000c15a4 122 'INNERTUBE_CONTEXT': {
123 'client': {
124 'clientName': 'ANDROID_EMBEDDED_PLAYER',
c7dcf0b3 125 'clientVersion': '17.29.34',
126 'androidSdkVersion': 30
000c15a4 127 },
128 },
b6de707d 129 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
130 'REQUIRE_JS_PLAYER': False
000c15a4 131 },
132 'android_music': {
18c7683d 133 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
000c15a4 134 'INNERTUBE_CONTEXT': {
135 'client': {
136 'clientName': 'ANDROID_MUSIC',
a0c830f4 137 'clientVersion': '5.16.51',
c7dcf0b3 138 'androidSdkVersion': 30
000c15a4 139 }
140 },
141 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
b6de707d 142 'REQUIRE_JS_PLAYER': False
000c15a4 143 },
e7e94f2a 144 'android_creator': {
18c7683d 145 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
e7e94f2a
D
146 'INNERTUBE_CONTEXT': {
147 'client': {
148 'clientName': 'ANDROID_CREATOR',
a0c830f4 149 'clientVersion': '22.28.100',
c7dcf0b3 150 'androidSdkVersion': 30
e7e94f2a
D
151 },
152 },
b6de707d 153 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
154 'REQUIRE_JS_PLAYER': False
e7e94f2a 155 },
18c7683d 156 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
157 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
000c15a4 158 'ios': {
18c7683d 159 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
000c15a4 160 'INNERTUBE_CONTEXT': {
161 'client': {
162 'clientName': 'IOS',
a0c830f4 163 'clientVersion': '17.30.1',
18c7683d 164 'deviceModel': 'iPhone14,3',
000c15a4 165 }
166 },
b6de707d 167 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
168 'REQUIRE_JS_PLAYER': False
000c15a4 169 },
170 'ios_embedded': {
000c15a4 171 'INNERTUBE_CONTEXT': {
172 'client': {
173 'clientName': 'IOS_MESSAGES_EXTENSION',
a0c830f4 174 'clientVersion': '17.30.1',
18c7683d 175 'deviceModel': 'iPhone14,3',
000c15a4 176 },
177 },
b6de707d 178 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
179 'REQUIRE_JS_PLAYER': False
000c15a4 180 },
181 'ios_music': {
18c7683d 182 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
000c15a4 183 'INNERTUBE_CONTEXT': {
184 'client': {
185 'clientName': 'IOS_MUSIC',
a0c830f4 186 'clientVersion': '5.18',
000c15a4 187 },
188 },
b6de707d 189 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
190 'REQUIRE_JS_PLAYER': False
000c15a4 191 },
e7e94f2a
D
192 'ios_creator': {
193 'INNERTUBE_CONTEXT': {
194 'client': {
195 'clientName': 'IOS_CREATOR',
a0c830f4 196 'clientVersion': '22.29.101',
e7e94f2a
D
197 },
198 },
b6de707d 199 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
200 'REQUIRE_JS_PLAYER': False
e7e94f2a 201 },
3619f78d 202 # mweb has 'ultralow' formats
203 # See: https://github.com/yt-dlp/yt-dlp/pull/557
000c15a4 204 'mweb': {
18c7683d 205 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
000c15a4 206 'INNERTUBE_CONTEXT': {
207 'client': {
208 'clientName': 'MWEB',
a0c830f4 209 'clientVersion': '2.20220801.00.00',
000c15a4 210 }
211 },
212 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
e7870111
D
213 },
214 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
215 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
216 'tv_embedded': {
217 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
218 'INNERTUBE_CONTEXT': {
219 'client': {
220 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
221 'clientVersion': '2.0',
222 },
223 },
224 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
225 },
000c15a4 226}
227
228
e7870111
D
229def _split_innertube_client(client_name):
230 variant, *base = client_name.rsplit('.', 1)
231 if base:
232 return variant, base[0], variant
233 base, *variant = client_name.split('_', 1)
234 return client_name, base, variant[0] if variant else None
235
236
000c15a4 237def build_innertube_clients():
2e4cacd0 238 THIRD_PARTY = {
e7870111 239 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
65c2fde2 240 }
e7870111 241 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
2e4cacd0 242 priority = qualities(BASE_CLIENTS[::-1])
000c15a4 243
244 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
eca330cb 245 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
000c15a4 246 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
b6de707d 247 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
000c15a4 248 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
000c15a4 249
e7870111 250 _, base_client, variant = _split_innertube_client(client)
2e4cacd0 251 ytcfg['priority'] = 10 * priority(base_client)
252
e48b3875 253 if not variant:
e7870111
D
254 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
255 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
256 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
257 embedscreen['priority'] -= 3
258 elif variant == 'embedded':
e48b3875 259 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
000c15a4 260 ytcfg['priority'] -= 2
e48b3875 261 else:
000c15a4 262 ytcfg['priority'] -= 3
263
264
265build_innertube_clients()
266
267
de7f3446 268class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b 269 """Provide base functions for Youtube extractors"""
e00eb564 270
3462ffa8 271 _RESERVED_NAMES = (
3cd786db 272 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
182bda88 273 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
3619f78d 274 r'browse|oembed|get_video_info|iframe_api|s/player|'
cd7c66cf 275 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 276
3619f78d 277 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
278
52efa4b3 279 # _NETRC_MACHINE = 'youtube'
3619f78d 280
b2e8bc1b
JMF
281 # If True it will raise an error if no login info is provided
282 _LOGIN_REQUIRED = False
283
d9190e44
RH
284 _INVIDIOUS_SITES = (
285 # invidious-redirect websites
286 r'(?:www\.)?redirect\.invidious\.io',
287 r'(?:(?:www|dev)\.)?invidio\.us',
0a41f331 288 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
d9190e44
RH
289 r'(?:www\.)?invidious\.pussthecat\.org',
290 r'(?:www\.)?invidious\.zee\.li',
291 r'(?:www\.)?invidious\.ethibox\.fr',
292 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
4c968755
U
293 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
294 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
d9190e44
RH
295 # youtube-dl invidious instances list
296 r'(?:(?:www|no)\.)?invidiou\.sh',
297 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
298 r'(?:www\.)?invidious\.kabi\.tk',
299 r'(?:www\.)?invidious\.mastodon\.host',
300 r'(?:www\.)?invidious\.zapashcanon\.fr',
301 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
302 r'(?:www\.)?invidious\.tinfoil-hat\.net',
303 r'(?:www\.)?invidious\.himiko\.cloud',
304 r'(?:www\.)?invidious\.reallyancient\.tech',
305 r'(?:www\.)?invidious\.tube',
306 r'(?:www\.)?invidiou\.site',
307 r'(?:www\.)?invidious\.site',
308 r'(?:www\.)?invidious\.xyz',
309 r'(?:www\.)?invidious\.nixnet\.xyz',
310 r'(?:www\.)?invidious\.048596\.xyz',
311 r'(?:www\.)?invidious\.drycat\.fr',
312 r'(?:www\.)?inv\.skyn3t\.in',
313 r'(?:www\.)?tube\.poal\.co',
314 r'(?:www\.)?tube\.connect\.cafe',
315 r'(?:www\.)?vid\.wxzm\.sx',
316 r'(?:www\.)?vid\.mint\.lgbt',
317 r'(?:www\.)?vid\.puffyan\.us',
318 r'(?:www\.)?yewtu\.be',
319 r'(?:www\.)?yt\.elukerio\.org',
320 r'(?:www\.)?yt\.lelux\.fi',
321 r'(?:www\.)?invidious\.ggc-project\.de',
322 r'(?:www\.)?yt\.maisputain\.ovh',
323 r'(?:www\.)?ytprivate\.com',
324 r'(?:www\.)?invidious\.13ad\.de',
325 r'(?:www\.)?invidious\.toot\.koeln',
326 r'(?:www\.)?invidious\.fdn\.fr',
327 r'(?:www\.)?watch\.nettohikari\.com',
328 r'(?:www\.)?invidious\.namazso\.eu',
329 r'(?:www\.)?invidious\.silkky\.cloud',
330 r'(?:www\.)?invidious\.exonip\.de',
331 r'(?:www\.)?invidious\.riverside\.rocks',
332 r'(?:www\.)?invidious\.blamefran\.net',
333 r'(?:www\.)?invidious\.moomoo\.de',
334 r'(?:www\.)?ytb\.trom\.tf',
335 r'(?:www\.)?yt\.cyberhost\.uk',
336 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
337 r'(?:www\.)?qklhadlycap4cnod\.onion',
338 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
339 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
340 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
341 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
342 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
343 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
344 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
345 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
346 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
347 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
d1c4f6d4
JW
348 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
349 r'(?:www\.)?piped\.kavin\.rocks',
350 r'(?:www\.)?piped\.silkky\.cloud',
351 r'(?:www\.)?piped\.tokhmi\.xyz',
352 r'(?:www\.)?piped\.moomoo\.me',
353 r'(?:www\.)?il\.ax',
354 r'(?:www\.)?piped\.syncpundit\.com',
355 r'(?:www\.)?piped\.mha\.fi',
356 r'(?:www\.)?piped\.mint\.lgbt',
357 r'(?:www\.)?piped\.privacy\.com\.de',
d9190e44
RH
358 )
359
cce889b9 360 def _initialize_consent(self):
361 cookies = self._get_cookies('https://www.youtube.com/')
362 if cookies.get('__Secure-3PSID'):
363 return
364 consent_id = None
365 consent = cookies.get('CONSENT')
366 if consent:
367 if 'YES' in consent.value:
368 return
369 consent_id = self._search_regex(
370 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
371 if not consent_id:
372 consent_id = random.randint(100, 999)
373 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 374
f3aa3c3f 375 def _initialize_pref(self):
376 cookies = self._get_cookies('https://www.youtube.com/')
377 pref_cookie = cookies.get('PREF')
378 pref = {}
379 if pref_cookie:
380 try:
14f25df2 381 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
f3aa3c3f 382 except ValueError:
383 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
396a76f7 384 pref.update({'hl': 'en', 'tz': 'UTC'})
14f25df2 385 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
f3aa3c3f 386
b2e8bc1b 387 def _real_initialize(self):
f3aa3c3f 388 self._initialize_pref()
cce889b9 389 self._initialize_consent()
a25bca9f 390 self._check_login_required()
391
392 def _check_login_required(self):
24146491 393 if self._LOGIN_REQUIRED and not self._cookies_passed:
52efa4b3 394 self.raise_login_required('Login details are needed to download this content', method='cookies')
c5e8d7af 395
b7c47b74 396 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
397 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
a0566bbf 398
000c15a4 399 def _get_default_ytcfg(self, client='web'):
400 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 401
000c15a4 402 def _get_innertube_host(self, client='web'):
403 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 404
000c15a4 405 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 406 # try_get but with fallback to default ytcfg client values when present
407 _func = lambda y: try_get(y, getter, expected_type)
408 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
409
000c15a4 410 def _extract_client_name(self, ytcfg, default_client='web'):
3619f78d 411 return self._ytcfg_get_safe(
412 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
14f25df2 413 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
109dd3b2 414
000c15a4 415 def _extract_client_version(self, ytcfg, default_client='web'):
3619f78d 416 return self._ytcfg_get_safe(
417 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
14f25df2 418 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
109dd3b2 419
2ae778b8 420 def _select_api_hostname(self, req_api_hostname, default_client=None):
421 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
422 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
423
000c15a4 424 def _extract_api_key(self, ytcfg=None, default_client='web'):
14f25df2 425 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
109dd3b2 426
000c15a4 427 def _extract_context(self, ytcfg=None, default_client='web'):
f3aa3c3f 428 context = get_first(
429 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
396a76f7 430 # Enforce language and tz for extraction
431 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
432 client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
109dd3b2 433 return context
434
cf87314d 435 _SAPISID = None
436
109dd3b2 437 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
a5c56234 438 time_now = round(time.time())
cf87314d 439 if self._SAPISID is None:
440 yt_cookies = self._get_cookies('https://www.youtube.com')
441 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
442 # See: https://github.com/yt-dlp/yt-dlp/issues/393
443 sapisid_cookie = dict_get(
444 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
445 if sapisid_cookie and sapisid_cookie.value:
446 self._SAPISID = sapisid_cookie.value
447 self.write_debug('Extracted SAPISID cookie')
448 # SAPISID cookie is required if not already present
449 if not yt_cookies.get('SAPISID'):
450 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
451 self._set_cookie(
452 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
453 else:
454 self._SAPISID = False
455 if not self._SAPISID:
456 return None
1974e99f 457 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
458 sapisidhash = hashlib.sha1(
86e5f3ed 459 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
1974e99f 460 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
461
462 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 463 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 464 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 465
109dd3b2 466 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 467 data.update(query)
11f9be09 468 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 469 real_headers.update({'content-type': 'application/json'})
470 if headers:
471 real_headers.update(headers)
2ae778b8 472 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
473 or api_key or self._extract_api_key(default_client=default_client))
545cc85d 474 return self._download_json(
2ae778b8 475 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
a5c56234 476 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 477 data=json.dumps(data).encode('utf8'), headers=real_headers,
2ae778b8 478 query={'key': api_key, 'prettyPrint': 'false'})
f4f751af 479
65141660 480 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
481 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
1890fc63 482
99e9e001 483 @staticmethod
484 def _extract_session_index(*data):
485 """
486 Index of current account in account list.
487 See: https://github.com/yt-dlp/yt-dlp/pull/519
488 """
489 for ytcfg in data:
490 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
491 if session_index is not None:
492 return session_index
493
494 # Deprecated?
495 def _extract_identity_token(self, ytcfg=None, webpage=None):
a1c5d2ca 496 if ytcfg:
14f25df2 497 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
a1c5d2ca
M
498 if token:
499 return token
99e9e001 500 if webpage:
501 return self._search_regex(
502 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
503 'identity token', default=None, fatal=False)
a1c5d2ca
M
504
505 @staticmethod
fe93e2c4 506 def _extract_account_syncid(*args):
8ea3f7b9 507 """
508 Extract syncId required to download private playlists of secondary channels
fe93e2c4 509 @params response and/or ytcfg
8ea3f7b9 510 """
fe93e2c4 511 for data in args:
512 # ytcfg includes channel_syncid if on secondary channel
14f25df2 513 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
fe93e2c4 514 if delegated_sid:
515 return delegated_sid
516 sync_ids = (try_get(
517 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
14f25df2 518 lambda x: x['DATASYNC_ID']), str) or '').split('||')
fe93e2c4 519 if len(sync_ids) >= 2 and sync_ids[1]:
520 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
521 # and just "user_syncid||" for primary channel. We only want the channel_syncid
522 return sync_ids[0]
a1c5d2ca 523
ac56cf38 524 @staticmethod
525 def _extract_visitor_data(*args):
526 """
527 Extracts visitorData from an API response or ytcfg
528 Appears to be used to track session state
529 """
9222c381 530 return get_first(
6c73052c 531 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
9222c381 532 expected_type=str)
ac56cf38 533
2762dbb1 534 @functools.cached_property
99e9e001 535 def is_authenticated(self):
536 return bool(self._generate_sapisidhash_header())
537
11f9be09 538 def extract_ytcfg(self, video_id, webpage):
8c54a305 539 if not webpage:
540 return {}
29f7c58a 541 return self._parse_json(
542 self._search_regex(
543 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 544 default='{}'), video_id, fatal=False) or {}
545
11f9be09 546 def generate_api_headers(
99e9e001 547 self, *, ytcfg=None, account_syncid=None, session_index=None,
548 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
549
2ae778b8 550 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
f4f751af 551 headers = {
14f25df2 552 'X-YouTube-Client-Name': str(
11f9be09 553 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
554 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
99e9e001 555 'Origin': origin,
556 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
557 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
ac56cf38 558 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
99e9e001 559 }
560 if session_index is None:
314ee305 561 session_index = self._extract_session_index(ytcfg)
562 if account_syncid or session_index is not None:
563 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
99e9e001 564
109dd3b2 565 auth = self._generate_sapisidhash_header(origin)
f4f751af 566 if auth is not None:
567 headers['Authorization'] = auth
109dd3b2 568 headers['X-Origin'] = origin
99e9e001 569 return {h: v for h, v in headers.items() if v is not None}
29f7c58a 570
a25bca9f 571 def _download_ytcfg(self, client, video_id):
572 url = {
573 'web': 'https://www.youtube.com',
574 'web_music': 'https://music.youtube.com',
575 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
576 }.get(client)
577 if not url:
578 return {}
579 webpage = self._download_webpage(
580 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
581 return self.extract_ytcfg(video_id, webpage) or {}
582
2d6659b9 583 @staticmethod
584 def _build_api_continuation_query(continuation, ctp=None):
585 query = {
586 'continuation': continuation
587 }
588 # TODO: Inconsistency with clickTrackingParams.
589 # Currently we have a fixed ctp contained within context (from ytcfg)
590 # and a ctp in root query for continuation.
591 if ctp:
592 query['clickTracking'] = {'clickTrackingParams': ctp}
593 return query
594
2d6659b9 595 @classmethod
596 def _extract_next_continuation_data(cls, renderer):
597 next_continuation = try_get(
598 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
599 lambda x: x['continuation']['reloadContinuationData']), dict)
600 if not next_continuation:
601 return
602 continuation = next_continuation.get('continuation')
603 if not continuation:
604 return
605 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 606 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 607
608 @classmethod
609 def _extract_continuation_ep_data(cls, continuation_ep: dict):
610 if isinstance(continuation_ep, dict):
611 continuation = try_get(
14f25df2 612 continuation_ep, lambda x: x['continuationCommand']['token'], str)
2d6659b9 613 if not continuation:
614 return
615 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 616 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 617
618 @classmethod
619 def _extract_continuation(cls, renderer):
620 next_continuation = cls._extract_next_continuation_data(renderer)
621 if next_continuation:
622 return next_continuation
fe93e2c4 623
2d6659b9 624 contents = []
625 for key in ('contents', 'items'):
626 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 627
2d6659b9 628 for content in contents:
629 if not isinstance(content, dict):
630 continue
631 continuation_ep = try_get(
632 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
633 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
634 dict)
635 continuation = cls._extract_continuation_ep_data(continuation_ep)
636 if continuation:
637 return continuation
638
fe93e2c4 639 @classmethod
640 def _extract_alerts(cls, data):
109dd3b2 641 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
642 if not isinstance(alert_dict, dict):
643 continue
644 for alert in alert_dict.values():
645 alert_type = alert.get('type')
646 if not alert_type:
647 continue
052e1350 648 message = cls._get_text(alert, 'text')
109dd3b2 649 if message:
650 yield alert_type, message
651
c0ac49bc 652 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
109dd3b2 653 errors = []
654 warnings = []
655 for alert_type, alert_message in alerts:
641ad5d8 656 if alert_type.lower() == 'error' and fatal:
109dd3b2 657 errors.append([alert_type, alert_message])
658 else:
659 warnings.append([alert_type, alert_message])
660
661 for alert_type, alert_message in (warnings + errors[:-1]):
86e5f3ed 662 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
109dd3b2 663 if errors:
664 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
665
666 def _extract_and_report_alerts(self, data, *args, **kwargs):
667 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
668
47193e02 669 def _extract_badges(self, renderer: dict):
670 badges = set()
671 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
14f25df2 672 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
47193e02 673 if label:
674 badges.add(label.lower())
675 return badges
676
677 @staticmethod
052e1350 678 def _get_text(data, *path_list, max_runs=None):
679 for path in path_list or [None]:
680 if path is None:
681 obj = [data]
682 else:
683 obj = traverse_obj(data, path, default=[])
684 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
685 obj = [obj]
686 for item in obj:
14f25df2 687 text = try_get(item, lambda x: x['simpleText'], str)
052e1350 688 if text:
689 return text
690 runs = try_get(item, lambda x: x['runs'], list) or []
691 if not runs and isinstance(item, list):
692 runs = item
693
694 runs = runs[:min(len(runs), max_runs or len(runs))]
695 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
696 if text:
697 return text
47193e02 698
f0d785d3 699 def _get_count(self, data, *path_list):
700 count_text = self._get_text(data, *path_list) or ''
701 count = parse_count(count_text)
702 if count is None:
703 count = str_to_int(
704 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
705 return count
706
a709d873 707 @staticmethod
708 def _extract_thumbnails(data, *path_list):
709 """
710 Extract thumbnails from thumbnails dict
711 @param path_list: path list to level that contains 'thumbnails' key
712 """
713 thumbnails = []
714 for path in path_list or [()]:
715 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
716 thumbnail_url = url_or_none(thumbnail.get('url'))
717 if not thumbnail_url:
718 continue
719 # Sometimes youtube gives a wrong thumbnail URL. See:
720 # https://github.com/yt-dlp/yt-dlp/issues/233
721 # https://github.com/ytdl-org/youtube-dl/issues/28023
722 if 'maxresdefault' in thumbnail_url:
723 thumbnail_url = thumbnail_url.split('?')[0]
724 thumbnails.append({
725 'url': thumbnail_url,
726 'height': int_or_none(thumbnail.get('height')),
727 'width': int_or_none(thumbnail.get('width')),
728 })
729 return thumbnails
730
f3aa3c3f 731 @staticmethod
732 def extract_relative_time(relative_time_text):
733 """
734 Extracts a relative time from string and converts to dt object
f0d785d3 735 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
f3aa3c3f 736 """
f0d785d3 737 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
f3aa3c3f 738 if mobj:
f0d785d3 739 start = mobj.group('start')
740 if start:
741 return datetime_from_str(start)
f3aa3c3f 742 try:
f0d785d3 743 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
f3aa3c3f 744 except ValueError:
745 return None
746
747 def _extract_time_text(self, renderer, *path_list):
a25bca9f 748 """@returns (timestamp, time_text)"""
f3aa3c3f 749 text = self._get_text(renderer, *path_list) or ''
750 dt = self.extract_relative_time(text)
751 timestamp = None
752 if isinstance(dt, datetime.datetime):
753 timestamp = calendar.timegm(dt.timetuple())
f0d785d3 754
755 if timestamp is None:
756 timestamp = (
757 unified_timestamp(text) or unified_timestamp(
758 self._search_regex(
17322130 759 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
396a76f7 760 text.lower(), 'time text', default=None)))
f0d785d3 761
f3aa3c3f 762 if text and timestamp is None:
17322130 763 self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
f3aa3c3f 764 return timestamp, text
765
109dd3b2 766 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
767 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 768 default_client='web'):
be5c1ae8 769 for retry in self.RetryManager():
109dd3b2 770 try:
771 response = self._call_api(
772 ep=ep, fatal=True, headers=headers,
be5c1ae8 773 video_id=item_id, query=query, note=note,
109dd3b2 774 context=self._extract_context(ytcfg, default_client),
775 api_key=self._extract_api_key(ytcfg, default_client),
be5c1ae8 776 api_hostname=api_hostname, default_client=default_client)
109dd3b2 777 except ExtractorError as e:
be5c1ae8 778 if not isinstance(e.cause, network_exceptions):
779 return self._error_or_warning(e, fatal=fatal)
780 elif not isinstance(e.cause, urllib.error.HTTPError):
781 retry.error = e
782 continue
109dd3b2 783
be5c1ae8 784 first_bytes = e.cause.read(512)
785 if not is_html(first_bytes):
786 yt_error = try_get(
787 self._parse_json(
788 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
789 lambda x: x['error']['message'], str)
790 if yt_error:
791 self._report_alerts([('ERROR', yt_error)], fatal=False)
792 # Downloading page may result in intermittent 5xx HTTP error
793 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
794 # We also want to catch all other network exceptions since errors in later pages can be troublesome
795 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
796 if e.cause.code not in (403, 429):
797 retry.error = e
798 continue
799 return self._error_or_warning(e, fatal=fatal)
800
801 try:
802 self._extract_and_report_alerts(response, only_once=True)
803 except ExtractorError as e:
804 # YouTube servers may return errors we want to retry on in a 200 OK response
805 # See: https://github.com/yt-dlp/yt-dlp/issues/839
806 if 'unknown error' in e.msg.lower():
807 retry.error = e
808 continue
809 return self._error_or_warning(e, fatal=fatal)
810 # Youtube sometimes sends incomplete data
811 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
812 if not traverse_obj(response, *variadic(check_get_keys)):
3ce29336 813 retry.error = ExtractorError('Incomplete data received', expected=True)
be5c1ae8 814 continue
815
816 return response
109dd3b2 817
9297939e 818 @staticmethod
819 def is_music_url(url):
820 return re.match(r'https?://music\.youtube\.com/', url) is not None
821
30a074c2 822 def _extract_video(self, renderer):
823 video_id = renderer.get('videoId')
052e1350 824 title = self._get_text(renderer, 'title')
825 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 826 duration = parse_duration(self._get_text(
827 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1c1b2f96 828 if duration is None:
829 duration = parse_duration(self._search_regex(
830 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
831 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
832 video_id, default=None, group='duration'))
833
f0d785d3 834 view_count = self._get_count(renderer, 'viewCountText')
fe93e2c4 835
052e1350 836 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
f3aa3c3f 837 channel_id = traverse_obj(
a44ca5a4 838 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
839 expected_type=str, get_all=False)
f3aa3c3f 840 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
841 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
842 overlay_style = traverse_obj(
a44ca5a4 843 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
844 get_all=False, expected_type=str)
f3aa3c3f 845 badges = self._extract_badges(renderer)
a709d873 846 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
fd2ad7cb 847 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
a44ca5a4 848 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
849 expected_type=str)) or ''
fd2ad7cb 850 url = f'https://www.youtube.com/watch?v={video_id}'
a44ca5a4 851 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
fd2ad7cb 852 url = f'https://www.youtube.com/shorts/{video_id}'
a709d873 853
30a074c2 854 return {
39ed931e 855 '_type': 'url',
30a074c2 856 'ie_key': YoutubeIE.ie_key(),
857 'id': video_id,
fd2ad7cb 858 'url': url,
30a074c2 859 'title': title,
860 'description': description,
861 'duration': duration,
862 'view_count': view_count,
863 'uploader': uploader,
f3aa3c3f 864 'channel_id': channel_id,
a709d873 865 'thumbnails': thumbnails,
a44ca5a4 866 'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
867 if self._configuration_arg('approximate_date', ie_key='youtubetab')
868 else None),
f3aa3c3f 869 'live_status': ('is_upcoming' if scheduled_timestamp is not None
870 else 'was_live' if 'streamed' in time_text.lower()
a831c2ea 871 else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges
f3aa3c3f 872 else None),
873 'release_timestamp': scheduled_timestamp,
874 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
30a074c2 875 }
876
0c148415 877
360e1ca5 878class YoutubeIE(YoutubeBaseInfoExtractor):
96565c7e 879 IE_DESC = 'YouTube'
cb7dfeea 880 _VALID_URL = r"""(?x)^
c5e8d7af 881 (
edb53e2d 882 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 883 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
884 (?:www\.)?deturl\.com/www\.youtube\.com|
885 (?:www\.)?pwnyoutube\.com|
886 (?:www\.)?hooktube\.com|
887 (?:www\.)?yourepeat\.com|
888 tube\.majestyc\.net|
889 %(invidious)s|
890 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
891 (?:.*?\#/)? # handle anchor (#/) redirect urls
892 (?: # the various things that can precede the ID:
b6ce9bb0 893 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
c5e8d7af 894 |(?: # or the v= param in all its forms
f7000f3a 895 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 896 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 897 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
898 v=
899 )
f4b05232 900 ))
cbaed4bb
S
901 |(?:
902 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
903 vid\.plus| # or vid.plus/xxxx
904 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 905 %(invidious)s
cbaed4bb 906 )/
edb53e2d 907 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 908 )
c5e8d7af 909 )? # all until now is optional -> you can pass the naked ID
201c1459 910 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 911 (?(1).+)? # if we found the ID, everything can follow
9297939e 912 (?:\#|$)""" % {
d9190e44 913 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
bc2ca1bb 914 }
bfd973ec 915 _EMBED_REGEX = [r'''(?x)
916 (?:
917 <iframe[^>]+?src=|
918 data-video-url=|
919 <embed[^>]+?src=|
920 embedSWF\(?:\s*|
921 <object[^>]+data=|
922 new\s+SWFObject\(
923 )
924 (["\'])
925 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
926 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
927 \1''']
e40c758c 928 _PLAYER_INFO_RE = (
cc2db878 929 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
930 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 931 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 932 )
2c62dc26 933 _formats = {
c2d3cb4c 934 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
935 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
936 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
937 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
938 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
939 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
940 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
941 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 942 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 943 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
944 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
945 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
946 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
947 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
948 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 949 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 950 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
951 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 952
953
954 # 3D videos
c2d3cb4c 955 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
956 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
957 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
958 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 959 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
960 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
961 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 962
96fb5605 963 # Apple HTTP Live Streaming
11f12195 964 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 965 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
966 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
967 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
968 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
969 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 970 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
971 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
972
973 # DASH mp4 video
d23028a8
S
974 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
975 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
976 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
977 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
978 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 979 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
980 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
981 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
982 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
983 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
984 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
985 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 986
f6f1fc92 987 # Dash mp4 audio
d23028a8
S
988 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
989 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
990 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
991 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
992 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
993 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
994 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
995
996 # Dash webm
d23028a8
S
997 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
998 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
999 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1000 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1001 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1002 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1003 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1004 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1005 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1006 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1007 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1008 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1009 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1010 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1011 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1012 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1013 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1014 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1015 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1016 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1017 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1018 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1019
1020 # Dash webm audio
d23028a8
S
1021 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1022 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1023
0857baad 1024 # Dash webm audio with opus inside
d23028a8
S
1025 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1026 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1027 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1028
ce6b9a2d
PH
1029 # RTMP (unnamed)
1030 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1031
1032 # av01 video only formats sometimes served with "unknown" codecs
9b5fa9ee
TOH
1033 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1034 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1035 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1036 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1037 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1038 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1039 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1040 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
c5e8d7af 1041 }
29f7c58a 1042 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1043
fd5c4aab
S
1044 _GEO_BYPASS = False
1045
78caa52a 1046 IE_NAME = 'youtube'
2eb88d95
PH
1047 _TESTS = [
1048 {
2d3d2997 1049 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1050 'info_dict': {
1051 'id': 'BaW_jenozKc',
1052 'ext': 'mp4',
3867038a 1053 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1054 'uploader': 'Philipp Hagemeister',
1055 'uploader_id': 'phihag',
ec85ded8 1056 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
ff9f925b 1057 'channel': 'Philipp Hagemeister',
dd4c4492
S
1058 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1059 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1060 'upload_date': '20121002',
ff9f925b 1061 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
4bc3a23e 1062 'categories': ['Science & Technology'],
3867038a 1063 'tags': ['youtube-dl'],
556dbe7f 1064 'duration': 10,
dbdaaa23 1065 'view_count': int,
3e7c1224 1066 'like_count': int,
ff9f925b 1067 'availability': 'public',
1068 'playable_in_embed': True,
1069 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1070 'live_status': 'not_live',
1071 'age_limit': 0,
7c80519c 1072 'start_time': 1,
297a564b 1073 'end_time': 9,
12a1b225 1074 'comment_count': int,
6c73052c 1075 'channel_follower_count': int
2eb88d95 1076 }
0e853ca4 1077 },
fccd3771 1078 {
4bc3a23e
PH
1079 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1080 'note': 'Embed-only video (#1746)',
1081 'info_dict': {
1082 'id': 'yZIXLfi8CZQ',
1083 'ext': 'mp4',
1084 'upload_date': '20120608',
1085 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1086 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1087 'uploader': 'SET India',
94bfcd23 1088 'uploader_id': 'setindia',
ec85ded8 1089 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1090 'age_limit': 18,
545cc85d 1091 },
1092 'skip': 'Private video',
fccd3771 1093 },
11b56058 1094 {
8bdd16b4 1095 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1096 'note': 'Use the first video ID in the URL',
1097 'info_dict': {
1098 'id': 'BaW_jenozKc',
1099 'ext': 'mp4',
3867038a 1100 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1101 'uploader': 'Philipp Hagemeister',
1102 'uploader_id': 'phihag',
ec85ded8 1103 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
976ae3ea 1104 'channel': 'Philipp Hagemeister',
1105 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1106 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
11b56058 1107 'upload_date': '20121002',
976ae3ea 1108 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
11b56058 1109 'categories': ['Science & Technology'],
3867038a 1110 'tags': ['youtube-dl'],
556dbe7f 1111 'duration': 10,
dbdaaa23 1112 'view_count': int,
11b56058 1113 'like_count': int,
976ae3ea 1114 'availability': 'public',
1115 'playable_in_embed': True,
1116 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1117 'live_status': 'not_live',
1118 'age_limit': 0,
12a1b225 1119 'comment_count': int,
6c73052c 1120 'channel_follower_count': int
34a7de29
S
1121 },
1122 'params': {
1123 'skip_download': True,
1124 },
11b56058 1125 },
dd27fd17 1126 {
2d3d2997 1127 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1128 'note': '256k DASH audio (format 141) via DASH manifest',
1129 'info_dict': {
1130 'id': 'a9LDPn-MO4I',
1131 'ext': 'm4a',
1132 'upload_date': '20121002',
1133 'uploader_id': '8KVIDEO',
ec85ded8 1134 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1135 'description': '',
1136 'uploader': '8KVIDEO',
1137 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1138 },
4bc3a23e
PH
1139 'params': {
1140 'youtube_include_dash_manifest': True,
1141 'format': '141',
4919603f 1142 },
de3c7fe0 1143 'skip': 'format 141 not served anymore',
dd27fd17 1144 },
8bdd16b4 1145 # DASH manifest with encrypted signature
1146 {
1147 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1148 'info_dict': {
1149 'id': 'IB3lcPjvWLA',
1150 'ext': 'm4a',
1151 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1152 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1153 'duration': 244,
1154 'uploader': 'AfrojackVEVO',
1155 'uploader_id': 'AfrojackVEVO',
1156 'upload_date': '20131011',
cc2db878 1157 'abr': 129.495,
976ae3ea 1158 'like_count': int,
1159 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1160 'playable_in_embed': True,
1161 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1162 'view_count': int,
1163 'track': 'The Spark',
1164 'live_status': 'not_live',
1165 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1166 'channel': 'Afrojack',
1167 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1168 'tags': 'count:19',
1169 'availability': 'public',
1170 'categories': ['Music'],
1171 'age_limit': 0,
1172 'alt_title': 'The Spark',
6c73052c 1173 'channel_follower_count': int
8bdd16b4 1174 },
1175 'params': {
1176 'youtube_include_dash_manifest': True,
1177 'format': '141/bestaudio[ext=m4a]',
1178 },
1179 },
65c2fde2 1180 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
c522adb1 1181 {
65c2fde2 1182 'note': 'Embed allowed age-gate video',
2d3d2997 1183 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1184 'info_dict': {
1185 'id': 'HtVdAasjOgU',
1186 'ext': 'mp4',
1187 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1188 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1189 'duration': 142,
c522adb1
JMF
1190 'uploader': 'The Witcher',
1191 'uploader_id': 'WitcherGame',
ec85ded8 1192 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1193 'upload_date': '20140605',
34952f09 1194 'age_limit': 18,
976ae3ea 1195 'categories': ['Gaming'],
1196 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1197 'availability': 'needs_auth',
1198 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1199 'like_count': int,
1200 'channel': 'The Witcher',
1201 'live_status': 'not_live',
1202 'tags': 'count:17',
1203 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1204 'playable_in_embed': True,
1205 'view_count': int,
6c73052c 1206 'channel_follower_count': int
c522adb1
JMF
1207 },
1208 },
65c2fde2 1209 {
1210 'note': 'Age-gate video with embed allowed in public site',
1211 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1212 'info_dict': {
1213 'id': 'HsUATh_Nc2U',
1214 'ext': 'mp4',
1215 'title': 'Godzilla 2 (Official Video)',
1216 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1217 'upload_date': '20200408',
1218 'uploader_id': 'FlyingKitty900',
1219 'uploader': 'FlyingKitty',
1220 'age_limit': 18,
976ae3ea 1221 'availability': 'needs_auth',
1222 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1223 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1224 'channel': 'FlyingKitty',
1225 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1226 'view_count': int,
1227 'categories': ['Entertainment'],
1228 'live_status': 'not_live',
1229 'tags': ['Flyingkitty', 'godzilla 2'],
1230 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1231 'like_count': int,
1232 'duration': 177,
1233 'playable_in_embed': True,
6c73052c 1234 'channel_follower_count': int
65c2fde2 1235 },
1236 },
1237 {
1238 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1239 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1240 'info_dict': {
1241 'id': 'Tq92D6wQ1mg',
1242 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
3619f78d 1243 'ext': 'mp4',
17322130 1244 'upload_date': '20191228',
65c2fde2 1245 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1246 'uploader': 'Projekt Melody',
1247 'description': 'md5:17eccca93a786d51bc67646756894066',
1248 'age_limit': 18,
976ae3ea 1249 'like_count': int,
1250 'availability': 'needs_auth',
1251 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1252 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1253 'view_count': int,
1254 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1255 'channel': 'Projekt Melody',
1256 'live_status': 'not_live',
1257 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1258 'playable_in_embed': True,
1259 'categories': ['Entertainment'],
1260 'duration': 106,
1261 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
12a1b225 1262 'comment_count': int,
6c73052c 1263 'channel_follower_count': int
65c2fde2 1264 },
1265 },
1266 {
1267 'note': 'Non-Agegated non-embeddable video',
1268 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1269 'info_dict': {
1270 'id': 'MeJVWBSsPAY',
1271 'ext': 'mp4',
1272 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1273 'uploader': 'Herr Lurik',
1274 'uploader_id': 'st3in234',
1275 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1276 'upload_date': '20130730',
976ae3ea 1277 'track': 'Such mich find mich',
1278 'age_limit': 0,
1279 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1280 'like_count': int,
1281 'playable_in_embed': False,
1282 'creator': 'OOMPH!',
1283 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1284 'view_count': int,
1285 'alt_title': 'Such mich find mich',
1286 'duration': 210,
1287 'channel': 'Herr Lurik',
1288 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1289 'categories': ['Music'],
1290 'availability': 'public',
1291 'uploader_url': 'http://www.youtube.com/user/st3in234',
1292 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1293 'live_status': 'not_live',
1294 'artist': 'OOMPH!',
6c73052c 1295 'channel_follower_count': int
65c2fde2 1296 },
1297 },
1298 {
1299 'note': 'Non-bypassable age-gated video',
1300 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1301 'only_matching': True,
1302 },
8bdd16b4 1303 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1304 # YouTube Red ad is not captured for creator
1305 {
1306 'url': '__2ABJjxzNo',
1307 'info_dict': {
1308 'id': '__2ABJjxzNo',
1309 'ext': 'mp4',
1310 'duration': 266,
1311 'upload_date': '20100430',
1312 'uploader_id': 'deadmau5',
1313 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1314 'creator': 'deadmau5',
1315 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1316 'uploader': 'deadmau5',
1317 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1318 'alt_title': 'Some Chords',
976ae3ea 1319 'availability': 'public',
1320 'tags': 'count:14',
1321 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1322 'view_count': int,
1323 'live_status': 'not_live',
1324 'channel': 'deadmau5',
1325 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1326 'like_count': int,
1327 'track': 'Some Chords',
1328 'artist': 'deadmau5',
1329 'playable_in_embed': True,
1330 'age_limit': 0,
1331 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1332 'categories': ['Music'],
1333 'album': 'Some Chords',
6c73052c 1334 'channel_follower_count': int
8bdd16b4 1335 },
1336 'expected_warnings': [
1337 'DASH manifest missing',
1338 ]
1339 },
067aa17e 1340 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1341 {
1342 'url': 'lqQg6PlCWgI',
1343 'info_dict': {
1344 'id': 'lqQg6PlCWgI',
1345 'ext': 'mp4',
556dbe7f 1346 'duration': 6085,
90227264 1347 'upload_date': '20150827',
cbe2bd91 1348 'uploader_id': 'olympic',
ec85ded8 1349 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
12a1b225 1350 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
11f9be09 1351 'uploader': 'Olympics',
cbe2bd91 1352 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
976ae3ea 1353 'like_count': int,
1354 'release_timestamp': 1343767800,
1355 'playable_in_embed': True,
1356 'categories': ['Sports'],
1357 'release_date': '20120731',
1358 'channel': 'Olympics',
1359 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1360 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1361 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1362 'age_limit': 0,
1363 'availability': 'public',
1364 'live_status': 'was_live',
1365 'view_count': int,
1366 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
6c73052c 1367 'channel_follower_count': int
cbe2bd91
PH
1368 },
1369 'params': {
1370 'skip_download': 'requires avconv',
e52a40ab 1371 }
cbe2bd91 1372 },
6271f1ca
PH
1373 # Non-square pixels
1374 {
1375 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1376 'info_dict': {
1377 'id': '_b-2C3KPAM0',
1378 'ext': 'mp4',
1379 'stretched_ratio': 16 / 9.,
556dbe7f 1380 'duration': 85,
6271f1ca
PH
1381 'upload_date': '20110310',
1382 'uploader_id': 'AllenMeow',
ec85ded8 1383 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1384 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1385 'uploader': '孫ᄋᄅ',
6271f1ca 1386 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
976ae3ea 1387 'playable_in_embed': True,
1388 'channel': '孫ᄋᄅ',
1389 'age_limit': 0,
1390 'tags': 'count:11',
1391 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1392 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1393 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1394 'view_count': int,
1395 'categories': ['People & Blogs'],
1396 'like_count': int,
1397 'live_status': 'not_live',
1398 'availability': 'unlisted',
12a1b225 1399 'comment_count': int,
6c73052c 1400 'channel_follower_count': int
6271f1ca 1401 },
06b491eb
S
1402 },
1403 # url_encoded_fmt_stream_map is empty string
1404 {
1405 'url': 'qEJwOuvDf7I',
1406 'info_dict': {
1407 'id': 'qEJwOuvDf7I',
f57b7835 1408 'ext': 'webm',
06b491eb
S
1409 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1410 'description': '',
1411 'upload_date': '20150404',
1412 'uploader_id': 'spbelect',
1413 'uploader': 'Наблюдатели Петербурга',
1414 },
1415 'params': {
1416 'skip_download': 'requires avconv',
e323cf3f
S
1417 },
1418 'skip': 'This live event has ended.',
06b491eb 1419 },
067aa17e 1420 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1421 {
1422 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1423 'info_dict': {
1424 'id': 'FIl7x6_3R5Y',
eb6793ba 1425 'ext': 'webm',
da77d856
S
1426 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1427 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1428 'duration': 220,
da77d856
S
1429 'upload_date': '20150625',
1430 'uploader_id': 'dorappi2000',
ec85ded8 1431 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1432 'uploader': 'dorappi2000',
eb6793ba 1433 'formats': 'mincount:31',
da77d856 1434 },
eb6793ba 1435 'skip': 'not actual anymore',
2ee8f5d8 1436 },
8a1a26ce
YCH
1437 # DASH manifest with segment_list
1438 {
1439 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1440 'md5': '8ce563a1d667b599d21064e982ab9e31',
1441 'info_dict': {
1442 'id': 'CsmdDsKjzN8',
1443 'ext': 'mp4',
17ee98e1 1444 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1445 'uploader': 'Airtek',
1446 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1447 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1448 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1449 },
1450 'params': {
1451 'youtube_include_dash_manifest': True,
1452 'format': '135', # bestvideo
be49068d
S
1453 },
1454 'skip': 'This live event has ended.',
2ee8f5d8 1455 },
cf7e015f
S
1456 {
1457 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1458 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1459 'info_dict': {
545cc85d 1460 'id': 'jvGDaLqkpTg',
1461 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1462 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1463 },
1464 'playlist': [{
1465 'info_dict': {
545cc85d 1466 'id': 'jvGDaLqkpTg',
cf7e015f 1467 'ext': 'mp4',
545cc85d 1468 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1469 'description': 'md5:e03b909557865076822aa169218d6a5d',
1470 'duration': 10643,
1471 'upload_date': '20161111',
1472 'uploader': 'Team PGP',
1473 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1474 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1475 },
1476 }, {
1477 'info_dict': {
545cc85d 1478 'id': '3AKt1R1aDnw',
cf7e015f 1479 'ext': 'mp4',
545cc85d 1480 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1481 'description': 'md5:e03b909557865076822aa169218d6a5d',
1482 'duration': 10991,
1483 'upload_date': '20161111',
1484 'uploader': 'Team PGP',
1485 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1486 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1487 },
1488 }, {
1489 'info_dict': {
545cc85d 1490 'id': 'RtAMM00gpVc',
cf7e015f 1491 'ext': 'mp4',
545cc85d 1492 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1493 'description': 'md5:e03b909557865076822aa169218d6a5d',
1494 'duration': 10995,
1495 'upload_date': '20161111',
1496 'uploader': 'Team PGP',
1497 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1498 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1499 },
1500 }, {
1501 'info_dict': {
545cc85d 1502 'id': '6N2fdlP3C5U',
cf7e015f 1503 'ext': 'mp4',
545cc85d 1504 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1505 'description': 'md5:e03b909557865076822aa169218d6a5d',
1506 'duration': 10990,
1507 'upload_date': '20161111',
1508 'uploader': 'Team PGP',
1509 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1510 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1511 },
1512 }],
1513 'params': {
1514 'skip_download': True,
1515 },
65c2fde2 1516 'skip': 'Not multifeed anymore',
cbaed4bb 1517 },
f9f49d87 1518 {
067aa17e 1519 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1520 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1521 'info_dict': {
1522 'id': 'gVfLd0zydlo',
1523 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1524 },
1525 'playlist_count': 2,
be49068d 1526 'skip': 'Not multifeed anymore',
f9f49d87 1527 },
cbaed4bb 1528 {
2d3d2997 1529 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1530 'only_matching': True,
0e49d9a6 1531 },
6d4fc66b 1532 {
2d3d2997 1533 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1534 'only_matching': True,
1535 },
0e49d9a6 1536 {
067aa17e 1537 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1538 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1539 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1540 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1541 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1542 'info_dict': {
1543 'id': 'lsguqyKfVQg',
1544 'ext': 'mp4',
1545 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1546 'alt_title': 'Dark Walk',
0e49d9a6 1547 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1548 'duration': 133,
0e49d9a6
LL
1549 'upload_date': '20151119',
1550 'uploader_id': 'IronSoulElf',
ec85ded8 1551 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1552 'uploader': 'IronSoulElf',
11f9be09 1553 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1554 'track': 'Dark Walk',
1555 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1556 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
976ae3ea 1557 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1558 'categories': ['Film & Animation'],
1559 'view_count': int,
1560 'live_status': 'not_live',
1561 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1562 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1563 'tags': 'count:13',
1564 'availability': 'public',
1565 'channel': 'IronSoulElf',
1566 'playable_in_embed': True,
1567 'like_count': int,
1568 'age_limit': 0,
6c73052c 1569 'channel_follower_count': int
0e49d9a6
LL
1570 },
1571 'params': {
1572 'skip_download': True,
1573 },
1574 },
61f92af1 1575 {
067aa17e 1576 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1577 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1578 'only_matching': True,
1579 },
313dfc45
LL
1580 {
1581 # Video with yt:stretch=17:0
1582 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1583 'info_dict': {
1584 'id': 'Q39EVAstoRM',
1585 'ext': 'mp4',
1586 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1587 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1588 'upload_date': '20151107',
1589 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1590 'uploader': 'CH GAMER DROID',
1591 },
1592 'params': {
1593 'skip_download': True,
1594 },
be49068d 1595 'skip': 'This video does not exist.',
313dfc45 1596 },
201c1459 1597 {
1598 # Video with incomplete 'yt:stretch=16:'
1599 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1600 'only_matching': True,
1601 },
7caf9830
S
1602 {
1603 # Video licensed under Creative Commons
1604 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1605 'info_dict': {
1606 'id': 'M4gD1WSo5mA',
1607 'ext': 'mp4',
1608 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1609 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1610 'duration': 721,
17322130 1611 'upload_date': '20150128',
7caf9830 1612 'uploader_id': 'BerkmanCenter',
ec85ded8 1613 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1614 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830 1615 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1616 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1617 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1618 'like_count': int,
1619 'age_limit': 0,
1620 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1621 'channel': 'The Berkman Klein Center for Internet & Society',
1622 'availability': 'public',
1623 'view_count': int,
1624 'categories': ['Education'],
1625 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1626 'live_status': 'not_live',
1627 'playable_in_embed': True,
12a1b225 1628 'comment_count': int,
6c73052c 1629 'channel_follower_count': int
7caf9830
S
1630 },
1631 'params': {
1632 'skip_download': True,
1633 },
1634 },
fd050249
S
1635 {
1636 # Channel-like uploader_url
1637 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1638 'info_dict': {
1639 'id': 'eQcmzGIKrzg',
1640 'ext': 'mp4',
1641 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1642 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1643 'duration': 4060,
17322130 1644 'upload_date': '20151120',
eb6793ba 1645 'uploader': 'Bernie Sanders',
fd050249 1646 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1647 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249 1648 'license': 'Creative Commons Attribution license (reuse allowed)',
976ae3ea 1649 'playable_in_embed': True,
1650 'tags': 'count:12',
1651 'like_count': int,
1652 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1653 'age_limit': 0,
1654 'availability': 'public',
1655 'categories': ['News & Politics'],
1656 'channel': 'Bernie Sanders',
1657 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1658 'view_count': int,
1659 'live_status': 'not_live',
1660 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
12a1b225 1661 'comment_count': int,
6c73052c 1662 'channel_follower_count': int
fd050249
S
1663 },
1664 'params': {
1665 'skip_download': True,
1666 },
1667 },
040ac686
S
1668 {
1669 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1670 'only_matching': True,
7f29cf54
S
1671 },
1672 {
067aa17e 1673 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1674 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1675 'only_matching': True,
6496ccb4
S
1676 },
1677 {
1678 # Rental video preview
1679 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1680 'info_dict': {
1681 'id': 'uGpuVWrhIzE',
1682 'ext': 'mp4',
1683 'title': 'Piku - Trailer',
1684 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1685 'upload_date': '20150811',
1686 'uploader': 'FlixMatrix',
1687 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1688 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1689 'license': 'Standard YouTube License',
1690 },
1691 'params': {
1692 'skip_download': True,
1693 },
eb6793ba 1694 'skip': 'This video is not available.',
022a5d66 1695 },
12afdc2a
S
1696 {
1697 # YouTube Red video with episode data
1698 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1699 'info_dict': {
1700 'id': 'iqKdEhx-dD4',
1701 'ext': 'mp4',
1702 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1703 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1704 'duration': 2085,
12afdc2a
S
1705 'upload_date': '20170118',
1706 'uploader': 'Vsauce',
1707 'uploader_id': 'Vsauce',
1708 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1709 'series': 'Mind Field',
1710 'season_number': 1,
1711 'episode_number': 1,
976ae3ea 1712 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1713 'tags': 'count:12',
1714 'view_count': int,
1715 'availability': 'public',
1716 'age_limit': 0,
1717 'channel': 'Vsauce',
1718 'episode': 'Episode 1',
1719 'categories': ['Entertainment'],
1720 'season': 'Season 1',
1721 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1722 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1723 'like_count': int,
1724 'playable_in_embed': True,
1725 'live_status': 'not_live',
6c73052c 1726 'channel_follower_count': int
12afdc2a
S
1727 },
1728 'params': {
1729 'skip_download': True,
1730 },
1731 'expected_warnings': [
1732 'Skipping DASH manifest',
1733 ],
1734 },
c7121fa7
S
1735 {
1736 # The following content has been identified by the YouTube community
1737 # as inappropriate or offensive to some audiences.
1738 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1739 'info_dict': {
1740 'id': '6SJNVb0GnPI',
1741 'ext': 'mp4',
1742 'title': 'Race Differences in Intelligence',
1743 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1744 'duration': 965,
1745 'upload_date': '20140124',
1746 'uploader': 'New Century Foundation',
1747 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1748 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1749 },
1750 'params': {
1751 'skip_download': True,
1752 },
545cc85d 1753 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1754 },
022a5d66
S
1755 {
1756 # itag 212
1757 'url': '1t24XAntNCY',
1758 'only_matching': True,
fd5c4aab
S
1759 },
1760 {
1761 # geo restricted to JP
1762 'url': 'sJL6WA-aGkQ',
1763 'only_matching': True,
1764 },
cd5a74a2
S
1765 {
1766 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1767 'only_matching': True,
1768 },
bc2ca1bb 1769 {
1770 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1771 'only_matching': True,
1772 },
1773 {
1774 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1775 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1776 'only_matching': True,
1777 },
825cd268
RA
1778 {
1779 # DRM protected
1780 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1781 'only_matching': True,
4fe54c12
S
1782 },
1783 {
1784 # Video with unsupported adaptive stream type formats
1785 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1786 'info_dict': {
1787 'id': 'Z4Vy8R84T1U',
1788 'ext': 'mp4',
1789 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1790 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1791 'duration': 433,
1792 'upload_date': '20130923',
1793 'uploader': 'Amelia Putri Harwita',
1794 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1795 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1796 'formats': 'maxcount:10',
1797 },
1798 'params': {
1799 'skip_download': True,
1800 'youtube_include_dash_manifest': False,
1801 },
5429d6a9 1802 'skip': 'not actual anymore',
5caabd3c 1803 },
1804 {
822b9d9c 1805 # Youtube Music Auto-generated description
5caabd3c 1806 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1807 'info_dict': {
1808 'id': 'MgNrAu2pzNs',
1809 'ext': 'mp4',
1810 'title': 'Voyeur Girl',
1811 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1812 'upload_date': '20190312',
5429d6a9
S
1813 'uploader': 'Stephen - Topic',
1814 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1815 'artist': 'Stephen',
1816 'track': 'Voyeur Girl',
1817 'album': 'it\'s too much love to know my dear',
1818 'release_date': '20190313',
1819 'release_year': 2019,
976ae3ea 1820 'alt_title': 'Voyeur Girl',
1821 'view_count': int,
1822 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1823 'playable_in_embed': True,
1824 'like_count': int,
1825 'categories': ['Music'],
1826 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1827 'channel': 'Stephen',
1828 'availability': 'public',
1829 'creator': 'Stephen',
1830 'duration': 169,
1831 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1832 'age_limit': 0,
1833 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1834 'tags': 'count:11',
1835 'live_status': 'not_live',
6c73052c 1836 'channel_follower_count': int
5caabd3c 1837 },
1838 'params': {
1839 'skip_download': True,
1840 },
1841 },
66b48727
RA
1842 {
1843 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1844 'only_matching': True,
1845 },
011e75e6
S
1846 {
1847 # invalid -> valid video id redirection
1848 'url': 'DJztXj2GPfl',
1849 'info_dict': {
1850 'id': 'DJztXj2GPfk',
1851 'ext': 'mp4',
1852 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1853 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1854 'upload_date': '20090125',
1855 'uploader': 'Prochorowka',
1856 'uploader_id': 'Prochorowka',
1857 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1858 'artist': 'Panjabi MC',
1859 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1860 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1861 },
1862 'params': {
1863 'skip_download': True,
1864 },
545cc85d 1865 'skip': 'Video unavailable',
ea74e00b
DP
1866 },
1867 {
1868 # empty description results in an empty string
1869 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1870 'info_dict': {
1871 'id': 'x41yOUIvK2k',
1872 'ext': 'mp4',
1873 'title': 'IMG 3456',
1874 'description': '',
1875 'upload_date': '20170613',
1876 'uploader_id': 'ElevageOrVert',
1877 'uploader': 'ElevageOrVert',
976ae3ea 1878 'view_count': int,
1879 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1880 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1881 'like_count': int,
1882 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1883 'tags': [],
1884 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1885 'availability': 'public',
1886 'age_limit': 0,
1887 'categories': ['Pets & Animals'],
1888 'duration': 7,
1889 'playable_in_embed': True,
1890 'live_status': 'not_live',
1891 'channel': 'ElevageOrVert',
6c73052c 1892 'channel_follower_count': int
ea74e00b
DP
1893 },
1894 'params': {
1895 'skip_download': True,
1896 },
1897 },
a0566bbf 1898 {
29f7c58a 1899 # with '};' inside yt initial data (see [1])
1900 # see [2] for an example with '};' inside ytInitialPlayerResponse
1901 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1902 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1903 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1904 'info_dict': {
1905 'id': 'CHqg6qOn4no',
1906 'ext': 'mp4',
1907 'title': 'Part 77 Sort a list of simple types in c#',
1908 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1909 'upload_date': '20130831',
1910 'uploader_id': 'kudvenkat',
1911 'uploader': 'kudvenkat',
976ae3ea 1912 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
1913 'like_count': int,
1914 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
1915 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
1916 'live_status': 'not_live',
1917 'categories': ['Education'],
1918 'availability': 'public',
1919 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
1920 'tags': 'count:12',
1921 'playable_in_embed': True,
1922 'age_limit': 0,
1923 'view_count': int,
1924 'duration': 522,
1925 'channel': 'kudvenkat',
12a1b225 1926 'comment_count': int,
6c73052c 1927 'channel_follower_count': int
a0566bbf 1928 },
1929 'params': {
1930 'skip_download': True,
1931 },
1932 },
29f7c58a 1933 {
1934 # another example of '};' in ytInitialData
1935 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1936 'only_matching': True,
1937 },
1938 {
1939 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1940 'only_matching': True,
1941 },
545cc85d 1942 {
cc2db878 1943 # https://github.com/ytdl-org/youtube-dl/pull/28094
1944 'url': 'OtqTfy26tG0',
1945 'info_dict': {
1946 'id': 'OtqTfy26tG0',
1947 'ext': 'mp4',
1948 'title': 'Burn Out',
1949 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1950 'upload_date': '20141120',
1951 'uploader': 'The Cinematic Orchestra - Topic',
1952 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1953 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1954 'artist': 'The Cinematic Orchestra',
1955 'track': 'Burn Out',
1956 'album': 'Every Day',
976ae3ea 1957 'like_count': int,
1958 'live_status': 'not_live',
1959 'alt_title': 'Burn Out',
1960 'duration': 614,
1961 'age_limit': 0,
1962 'view_count': int,
1963 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1964 'creator': 'The Cinematic Orchestra',
1965 'channel': 'The Cinematic Orchestra',
1966 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
1967 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1968 'availability': 'public',
1969 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
1970 'categories': ['Music'],
1971 'playable_in_embed': True,
6c73052c 1972 'channel_follower_count': int
cc2db878 1973 },
1974 'params': {
1975 'skip_download': True,
1976 },
545cc85d 1977 },
bc2ca1bb 1978 {
1979 # controversial video, only works with bpctr when authenticated with cookies
1980 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1981 'only_matching': True,
1982 },
a1a7907b 1983 {
1984 # controversial video, requires bpctr/contentCheckOk
1985 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1986 'info_dict': {
1987 'id': 'SZJvDhaSDnc',
1988 'ext': 'mp4',
1989 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1990 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
976ae3ea 1991 'uploader': 'CBS Mornings',
11f9be09 1992 'uploader_id': 'CBSThisMorning',
a1a7907b 1993 'upload_date': '20140716',
976ae3ea 1994 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
1995 'duration': 170,
1996 'categories': ['News & Politics'],
1997 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
1998 'view_count': int,
1999 'channel': 'CBS Mornings',
2000 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2001 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2002 'age_limit': 18,
2003 'availability': 'needs_auth',
2004 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2005 'like_count': int,
2006 'live_status': 'not_live',
2007 'playable_in_embed': True,
6c73052c 2008 'channel_follower_count': int
a1a7907b 2009 }
2010 },
f7ad7160 2011 {
2012 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2013 'url': 'cBvYw8_A0vQ',
2014 'info_dict': {
2015 'id': 'cBvYw8_A0vQ',
2016 'ext': 'mp4',
2017 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2018 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2019 'upload_date': '20201120',
2020 'uploader': 'Walk around Japan',
2021 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2022 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
976ae3ea 2023 'duration': 1456,
2024 'categories': ['Travel & Events'],
2025 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2026 'view_count': int,
2027 'channel': 'Walk around Japan',
2028 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2029 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2030 'age_limit': 0,
2031 'availability': 'public',
2032 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2033 'live_status': 'not_live',
2034 'playable_in_embed': True,
6c73052c 2035 'channel_follower_count': int
f7ad7160 2036 },
2037 'params': {
2038 'skip_download': True,
2039 },
0fb983f6 2040 }, {
2041 # Has multiple audio streams
2042 'url': 'WaOKSUlf4TM',
2043 'only_matching': True
9297939e 2044 }, {
2045 # Requires Premium: has format 141 when requested using YTM url
2046 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2047 'only_matching': True
2048 }, {
120916da 2049 # multiple subtitles with same lang_code
2050 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2051 'only_matching': True,
109dd3b2 2052 }, {
2053 # Force use android client fallback
2054 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2055 'info_dict': {
2056 'id': 'YOelRv7fMxY',
11f9be09 2057 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 2058 'ext': '3gp',
2059 'upload_date': '20210624',
2060 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2061 'uploader': 'colinfurze',
11f9be09 2062 'uploader_id': 'colinfurze',
109dd3b2 2063 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
976ae3ea 2064 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2065 'duration': 596,
2066 'categories': ['Entertainment'],
2067 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2068 'view_count': int,
2069 'channel': 'colinfurze',
2070 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2071 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2072 'age_limit': 0,
2073 'availability': 'public',
2074 'like_count': int,
2075 'live_status': 'not_live',
2076 'playable_in_embed': True,
6c73052c 2077 'channel_follower_count': int
109dd3b2 2078 },
2079 'params': {
2080 'format': '17', # 3gp format available on android
2081 'extractor_args': {'youtube': {'player_client': ['android']}},
2082 },
120916da 2083 },
109dd3b2 2084 {
2085 # Skip download of additional client configs (remix client config in this case)
2086 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2087 'only_matching': True,
2088 'params': {
2089 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2090 },
8fc54b12 2091 }, {
2092 # shorts
2093 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2094 'only_matching': True,
9222c381 2095 }, {
2096 'note': 'Storyboards',
2097 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2098 'info_dict': {
2099 'id': '5KLPxDtMqe8',
2100 'ext': 'mhtml',
2101 'format_id': 'sb0',
2102 'title': 'Your Brain is Plastic',
2103 'uploader_id': 'scishow',
2104 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2105 'upload_date': '20140324',
2106 'uploader': 'SciShow',
976ae3ea 2107 'like_count': int,
2108 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2109 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2110 'view_count': int,
2111 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2112 'playable_in_embed': True,
2113 'tags': 'count:12',
2114 'uploader_url': 'http://www.youtube.com/user/scishow',
2115 'availability': 'public',
2116 'channel': 'SciShow',
2117 'live_status': 'not_live',
2118 'duration': 248,
2119 'categories': ['Education'],
2120 'age_limit': 0,
6c73052c 2121 'channel_follower_count': int
9222c381 2122 }, 'params': {'format': 'mhtml', 'skip_download': True}
992f9a73 2123 }, {
2124 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2125 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2126 'info_dict': {
2127 'id': '2NUZ8W2llS4',
2128 'ext': 'mp4',
2129 'title': 'The NP that test your phone performance 🙂',
2130 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2131 'uploader': 'Leon Nguyen',
2132 'uploader_id': 'VNSXIII',
2133 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2134 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2135 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2136 'duration': 21,
2137 'view_count': int,
2138 'age_limit': 0,
2139 'categories': ['Gaming'],
2140 'tags': 'count:23',
2141 'playable_in_embed': True,
2142 'live_status': 'not_live',
2143 'upload_date': '20220103',
2144 'like_count': int,
2145 'availability': 'public',
2146 'channel': 'Leon Nguyen',
2147 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
12a1b225 2148 'comment_count': int,
992f9a73 2149 'channel_follower_count': int
2150 }
2151 }, {
2152 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2153 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2154 'info_dict': {
2155 'id': 'mzZzzBU6lrM',
2156 'ext': 'mp4',
2157 'title': 'I Met GeorgeNotFound In Real Life...',
2158 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2159 'uploader': 'Quackity',
2160 'uploader_id': 'QuackityHQ',
2161 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2162 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2163 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2164 'duration': 955,
2165 'view_count': int,
2166 'age_limit': 0,
2167 'categories': ['Entertainment'],
2168 'tags': 'count:26',
2169 'playable_in_embed': True,
2170 'live_status': 'not_live',
2171 'release_timestamp': 1641172509,
2172 'release_date': '20220103',
2173 'upload_date': '20220103',
2174 'like_count': int,
2175 'availability': 'public',
2176 'channel': 'Quackity',
2177 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2178 'channel_follower_count': int
2179 }
2180 },
2181 { # continuous livestream. Microformat upload date should be preferred.
2182 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2183 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2184 'info_dict': {
2185 'id': 'kgx4WGK0oNU',
2186 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2187 'ext': 'mp4',
2188 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2189 'availability': 'public',
2190 'age_limit': 0,
2191 'release_timestamp': 1637975704,
2192 'upload_date': '20210619',
2193 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2194 'live_status': 'is_live',
2195 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2196 'uploader': '阿鲍Abao',
2197 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2198 'channel': 'Abao in Tokyo',
2199 'channel_follower_count': int,
2200 'release_date': '20211127',
2201 'tags': 'count:39',
2202 'categories': ['People & Blogs'],
2203 'like_count': int,
2204 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2205 'view_count': int,
2206 'playable_in_embed': True,
2207 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2208 },
2209 'params': {'skip_download': True}
6e634cbe 2210 }, {
2211 # Story. Requires specific player params to work.
ee27297f 2212 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
6e634cbe 2213 'info_dict': {
ee27297f 2214 'id': 'vv8qTUWmulI',
6e634cbe 2215 'ext': 'mp4',
ee27297f 2216 'availability': 'unlisted',
2217 'view_count': int,
2218 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2219 'upload_date': '20220526',
2220 'categories': ['Education'],
2221 'title': 'Story',
2222 'channel': 'IT\'S HISTORY',
2223 'description': '',
2224 'uploader_id': 'BlastfromthePast',
2225 'duration': 12,
2226 'uploader': 'IT\'S HISTORY',
6e634cbe 2227 'playable_in_embed': True,
6e634cbe 2228 'age_limit': 0,
6e634cbe 2229 'live_status': 'not_live',
ee27297f 2230 'tags': [],
2231 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2232 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2233 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
12a1b225
A
2234 },
2235 'skip': 'stories get removed after some period of time',
ee27297f 2236 }, {
2237 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2238 'info_dict': {
2239 'id': 'tjjjtzRLHvA',
2240 'ext': 'mp4',
2241 'title': 'ハッシュタグ無し };if window.ytcsi',
2242 'upload_date': '20220323',
2243 'like_count': int,
2244 'availability': 'unlisted',
2245 'channel': 'nao20010128nao',
2246 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2247 'age_limit': 0,
2248 'uploader': 'nao20010128nao',
2249 'uploader_id': 'nao20010128nao',
2250 'categories': ['Music'],
6e634cbe 2251 'view_count': int,
2252 'description': '',
ee27297f 2253 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2254 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2255 'live_status': 'not_live',
2256 'playable_in_embed': True,
2257 'channel_follower_count': int,
2258 'duration': 6,
2259 'tags': [],
2260 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
6e634cbe 2261 }
a4166234 2262 }, {
2263 'note': '6 channel audio',
2264 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2265 'only_matching': True,
6e634cbe 2266 }
2eb88d95
PH
2267 ]
2268
f2e8dbcc 2269 _WEBPAGE_TESTS = [
2270 # YouTube <object> embed
2271 {
2272 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2273 'md5': '873c81d308b979f0e23ee7e620b312a3',
2274 'info_dict': {
2275 'id': 'msN87y-iEx0',
2276 'ext': 'mp4',
2277 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2278 'upload_date': '20080526',
2279 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2280 'uploader': 'Christopher Sykes',
2281 'uploader_id': 'ChristopherJSykes',
2282 'age_limit': 0,
2283 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2284 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2285 'playable_in_embed': True,
2286 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2287 'like_count': int,
2288 'comment_count': int,
2289 'channel': 'Christopher Sykes',
2290 'live_status': 'not_live',
2291 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2292 'availability': 'public',
2293 'duration': 195,
2294 'view_count': int,
2295 'categories': ['Science & Technology'],
2296 'channel_follower_count': int,
2297 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2298 },
2299 'params': {
2300 'skip_download': True,
2301 }
2302 },
2303 ]
2304
201c1459 2305 @classmethod
2306 def suitable(cls, url):
4dfbf869 2307 from ..utils import parse_qs
2308
201c1459 2309 qs = parse_qs(url)
2310 if qs.get('list', [None])[0]:
2311 return False
86e5f3ed 2312 return super().suitable(url)
201c1459 2313
e0df6211 2314 def __init__(self, *args, **kwargs):
86e5f3ed 2315 super().__init__(*args, **kwargs)
545cc85d 2316 self._code_cache = {}
83799698 2317 self._player_cache = {}
e0df6211 2318
adbc4ec4 2319 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
adbc4ec4
THD
2320 lock = threading.Lock()
2321
2322 is_live = True
185bf310 2323 start_time = time.time()
adbc4ec4
THD
2324 formats = [f for f in formats if f.get('is_from_start')]
2325
185bf310 2326 def refetch_manifest(format_id, delay):
2327 nonlocal formats, start_time, is_live
2328 if time.time() <= start_time + delay:
adbc4ec4
THD
2329 return
2330
2331 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2332 video_details = traverse_obj(
2333 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2334 microformats = traverse_obj(
2335 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2336 expected_type=dict, default=[])
c646d76f 2337 _, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
185bf310 2338 start_time = time.time()
adbc4ec4 2339
185bf310 2340 def mpd_feed(format_id, delay):
adbc4ec4
THD
2341 """
2342 @returns (manifest_url, manifest_stream_number, is_live) or None
2343 """
2344 with lock:
185bf310 2345 refetch_manifest(format_id, delay)
adbc4ec4
THD
2346
2347 f = next((f for f in formats if f['format_id'] == format_id), None)
2348 if not f:
185bf310 2349 if not is_live:
2350 self.to_screen(f'{video_id}: Video is no longer live')
2351 else:
2352 self.report_warning(
2353 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
adbc4ec4
THD
2354 return None
2355 return f['manifest_url'], f['manifest_stream_number'], is_live
2356
2357 for f in formats:
a539f065 2358 f['is_live'] = True
adbc4ec4
THD
2359 f['protocol'] = 'http_dash_segments_generator'
2360 f['fragments'] = functools.partial(
2361 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2362
2363 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2364 FETCH_SPAN, MAX_DURATION = 5, 432000
2365
2366 mpd_url, stream_number, is_live = None, None, True
2367
2368 begin_index = 0
2369 download_start_time = ctx.get('start') or time.time()
2370
2371 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2372 if lack_early_segments:
2373 self.report_warning(bug_reports_message(
2374 'Starting download from the last 120 hours of the live stream since '
2375 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2376 lack_early_segments = True
2377
2378 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2379 fragments, fragment_base_url = None, None
2380
a539f065 2381 def _extract_sequence_from_mpd(refresh_sequence, immediate):
adbc4ec4
THD
2382 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2383 # Obtain from MPD's maximum seq value
2384 old_mpd_url = mpd_url
185bf310 2385 last_error = ctx.pop('last_error', None)
14f25df2 2386 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
185bf310 2387 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2388 or (mpd_url, stream_number, False))
2389 if not refresh_sequence:
2390 if expire_fast and not is_live:
2391 return False, last_seq
2392 elif old_mpd_url == mpd_url:
2393 return True, last_seq
adbc4ec4
THD
2394 try:
2395 fmts, _ = self._extract_mpd_formats_and_subtitles(
2396 mpd_url, None, note=False, errnote=False, fatal=False)
2397 except ExtractorError:
2398 fmts = None
2399 if not fmts:
a539f065 2400 no_fragment_score += 2
adbc4ec4
THD
2401 return False, last_seq
2402 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2403 fragments = fmt_info['fragments']
2404 fragment_base_url = fmt_info['fragment_base_url']
2405 assert fragment_base_url
2406
2407 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2408 return True, _last_seq
2409
2410 while is_live:
2411 fetch_time = time.time()
2412 if no_fragment_score > 30:
2413 return
2414 if last_segment_url:
2415 # Obtain from "X-Head-Seqnum" header value from each segment
2416 try:
2417 urlh = self._request_webpage(
2418 last_segment_url, None, note=False, errnote=False, fatal=False)
2419 except ExtractorError:
2420 urlh = None
2421 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2422 if last_seq is None:
a539f065 2423 no_fragment_score += 2
adbc4ec4
THD
2424 last_segment_url = None
2425 continue
2426 else:
a539f065
LNO
2427 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2428 no_fragment_score += 2
185bf310 2429 if not should_continue:
adbc4ec4
THD
2430 continue
2431
2432 if known_idx > last_seq:
2433 last_segment_url = None
2434 continue
2435
2436 last_seq += 1
2437
2438 if begin_index < 0 and known_idx < 0:
2439 # skip from the start when it's negative value
2440 known_idx = last_seq + begin_index
2441 if lack_early_segments:
2442 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2443 try:
2444 for idx in range(known_idx, last_seq):
2445 # do not update sequence here or you'll get skipped some part of it
a539f065 2446 should_continue, _ = _extract_sequence_from_mpd(False, False)
185bf310 2447 if not should_continue:
adbc4ec4
THD
2448 known_idx = idx - 1
2449 raise ExtractorError('breaking out of outer loop')
2450 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2451 yield {
2452 'url': last_segment_url,
36195c44 2453 'fragment_count': last_seq,
adbc4ec4
THD
2454 }
2455 if known_idx == last_seq:
2456 no_fragment_score += 5
2457 else:
2458 no_fragment_score = 0
2459 known_idx = last_seq
2460 except ExtractorError:
2461 continue
2462
2463 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2464
b6de707d 2465 def _extract_player_url(self, *ytcfgs, webpage=None):
2466 player_url = traverse_obj(
2467 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
14f25df2 2468 get_all=False, expected_type=str)
11f9be09 2469 if not player_url:
b6de707d 2470 return
60f393e4 2471 return urljoin('https://www.youtube.com', player_url)
109dd3b2 2472
b6de707d 2473 def _download_player_url(self, video_id, fatal=False):
2474 res = self._download_webpage(
2475 'https://www.youtube.com/iframe_api',
2476 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2477 if res:
2478 player_version = self._search_regex(
2479 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2480 if player_version:
2481 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2482
60064c53
PH
2483 def _signature_cache_id(self, example_sig):
2484 """ Return a string representation of a signature """
14f25df2 2485 return '.'.join(str(len(part)) for part in example_sig.split('.'))
60064c53 2486
e40c758c
S
2487 @classmethod
2488 def _extract_player_info(cls, player_url):
2489 for player_re in cls._PLAYER_INFO_RE:
2490 id_m = re.search(player_re, player_url)
2491 if id_m:
2492 break
2493 else:
c081b35c 2494 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 2495 return id_m.group('id')
e40c758c 2496
404f611f 2497 def _load_player(self, video_id, player_url, fatal=True):
109dd3b2 2498 player_id = self._extract_player_info(player_url)
2499 if player_id not in self._code_cache:
1276a43a 2500 code = self._download_webpage(
109dd3b2 2501 player_url, video_id, fatal=fatal,
2502 note='Downloading player ' + player_id,
2503 errnote='Download of %s failed' % player_url)
1276a43a 2504 if code:
2505 self._code_cache[player_id] = code
404f611f 2506 return self._code_cache.get(player_id)
109dd3b2 2507
e40c758c 2508 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 2509 player_id = self._extract_player_info(player_url)
e0df6211 2510
c4417ddb 2511 # Read from filesystem cache
86e5f3ed 2512 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
c4417ddb 2513 assert os.path.basename(func_id) == func_id
a0e07d31 2514
ae61d108 2515 self.write_debug(f'Extracting signature function {func_id}')
580ce007 2516 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
83799698 2517
580ce007 2518 if not cache_spec:
2519 code = self._load_player(video_id, player_url)
404f611f 2520 if code:
109dd3b2 2521 res = self._parse_sig_js(code)
ac668111 2522 test_string = ''.join(map(chr, range(len(example_sig))))
580ce007 2523 cache_spec = [ord(c) for c in res(test_string)]
9809740b 2524 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
580ce007 2525
2526 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 2527
60064c53 2528 def _print_sig_code(self, func, example_sig):
404f611f 2529 if not self.get_param('youtube_print_sig_code'):
2530 return
2531
edf3e38e
PH
2532 def gen_sig_code(idxs):
2533 def _genslice(start, end, step):
78caa52a 2534 starts = '' if start == 0 else str(start)
8bcc8756 2535 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 2536 steps = '' if step == 1 else (':%d' % step)
86e5f3ed 2537 return f's[{starts}{ends}{steps}]'
edf3e38e
PH
2538
2539 step = None
7af808a5
PH
2540 # Quelch pyflakes warnings - start will be set when step is set
2541 start = '(Never used)'
edf3e38e
PH
2542 for i, prev in zip(idxs[1:], idxs[:-1]):
2543 if step is not None:
2544 if i - prev == step:
2545 continue
2546 yield _genslice(start, prev, step)
2547 step = None
2548 continue
2549 if i - prev in [-1, 1]:
2550 step = i - prev
2551 start = prev
2552 continue
2553 else:
78caa52a 2554 yield 's[%d]' % prev
edf3e38e 2555 if step is None:
78caa52a 2556 yield 's[%d]' % i
edf3e38e
PH
2557 else:
2558 yield _genslice(start, i, step)
2559
ac668111 2560 test_string = ''.join(map(chr, range(len(example_sig))))
c705320f 2561 cache_res = func(test_string)
edf3e38e 2562 cache_spec = [ord(c) for c in cache_res]
78caa52a 2563 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53 2564 signature_id_tuple = '(%s)' % (
14f25df2 2565 ', '.join(str(len(p)) for p in example_sig.split('.')))
69ea8ca4 2566 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 2567 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 2568 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 2569
e0df6211
PH
2570 def _parse_sig_js(self, jscode):
2571 funcname = self._search_regex(
abefc03f
S
2572 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2573 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
858a65ec
P
2574 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2575 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2576 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2577 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 2578 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
2579 # Obsolete patterns
2580 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 2581 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
2582 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2583 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2584 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2585 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2586 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2587 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 2588 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
2589
2590 jsi = JSInterpreter(jscode)
2591 initial_function = jsi.extract_function(funcname)
e0df6211
PH
2592 return lambda s: initial_function([s])
2593
580ce007 2594 def _cached(self, func, *cache_id):
2595 def inner(*args, **kwargs):
2596 if cache_id not in self._player_cache:
2597 try:
2598 self._player_cache[cache_id] = func(*args, **kwargs)
2599 except ExtractorError as e:
2600 self._player_cache[cache_id] = e
2601 except Exception as e:
2602 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2603
2604 ret = self._player_cache[cache_id]
2605 if isinstance(ret, Exception):
2606 raise ret
2607 return ret
2608 return inner
2609
545cc85d 2610 def _decrypt_signature(self, s, video_id, player_url):
257a2501 2611 """Turn the encrypted s field into a working signature"""
580ce007 2612 extract_sig = self._cached(
2613 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2614 func = extract_sig(video_id, player_url, s)
2615 self._print_sig_code(func, s)
2616 return func(s)
404f611f 2617
2618 def _decrypt_nsig(self, s, video_id, player_url):
2619 """Turn the encrypted n field into a working signature"""
2620 if player_url is None:
2621 raise ExtractorError('Cannot decrypt nsig without player_url')
60f393e4 2622 player_url = urljoin('https://www.youtube.com', player_url)
404f611f 2623
580ce007 2624 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2625 if self.get_param('youtube_print_sig_code'):
2626 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
404f611f 2627
25836db6 2628 try:
2629 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2630 ret = extract_nsig(jsi, func_code)(s)
2631 except JSInterpreter.Exception as e:
2632 try:
2633 jsi = PhantomJSwrapper(self)
2634 except ExtractorError:
2635 raise e
2636 self.report_warning(
2637 f'Native nsig extraction failed: Trying with PhantomJS\n'
2638 f' n = {s} ; player = {player_url}', video_id)
2639 self.write_debug(e)
2640
2641 args, func_body = func_code
2642 ret = jsi.execute(
2643 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2644 video_id=video_id, note='Executing signature code').strip()
580ce007 2645
2646 self.write_debug(f'Decrypted nsig {s} => {ret}')
2647 return ret
2648
2649 def _extract_n_function_code(self, video_id, player_url):
404f611f 2650 player_id = self._extract_player_info(player_url)
9809740b 2651 func_code = self.cache.load('youtube-nsig', player_id)
580ce007 2652 jscode = func_code or self._load_player(video_id, player_url)
2653 jsi = JSInterpreter(jscode)
404f611f 2654
2655 if func_code:
580ce007 2656 return jsi, player_id, func_code
404f611f 2657
580ce007 2658 funcname, idx = self._search_regex(
2659 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2660 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2661 if idx:
2662 funcname = json.loads(js_to_json(self._search_regex(
2663 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2664 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2665
2666 func_code = jsi.extract_function_code(funcname)
2667 self.cache.store('youtube-nsig', player_id, func_code)
2668 return jsi, player_id, func_code
2669
2670 def _extract_n_function_from_code(self, jsi, func_code):
8f53dc44 2671 func = jsi.extract_function_from_code(*func_code)
f6ca640b 2672
580ce007 2673 def extract_nsig(s):
25836db6 2674 try:
2675 ret = func([s])
2676 except JSInterpreter.Exception:
2677 raise
2678 except Exception as e:
2679 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
2680
f6ca640b 2681 if ret.startswith('enhanced_except_'):
25836db6 2682 raise JSInterpreter.Exception('Signature function returned an exception')
f6ca640b 2683 return ret
580ce007 2684
2685 return extract_nsig
e0df6211 2686
109dd3b2 2687 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2688 """
2689 Extract signatureTimestamp (sts)
2690 Required to tell API what sig/player version is in use.
2691 """
2692 sts = None
2693 if isinstance(ytcfg, dict):
2694 sts = int_or_none(ytcfg.get('STS'))
2695
2696 if not sts:
2697 # Attempt to extract from player
2698 if player_url is None:
2699 error_msg = 'Cannot extract signature timestamp without player_url.'
2700 if fatal:
2701 raise ExtractorError(error_msg)
2702 self.report_warning(error_msg)
2703 return
404f611f 2704 code = self._load_player(video_id, player_url, fatal=fatal)
2705 if code:
109dd3b2 2706 sts = int_or_none(self._search_regex(
2707 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2708 'JS player signature timestamp', group='sts', fatal=fatal))
2709 return sts
2710
11f9be09 2711 def _mark_watched(self, video_id, player_responses):
06cc8f10
B
2712 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
2713 label = 'fully ' if is_full else ''
2714 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
2715 expected_type=url_or_none)
2716 if not url:
2717 self.report_warning(f'Unable to mark {label}watched')
2718 return
14f25df2 2719 parsed_url = urllib.parse.urlparse(url)
2720 qs = urllib.parse.parse_qs(parsed_url.query)
06cc8f10
B
2721
2722 # cpn generation algorithm is reverse engineered from base.js.
2723 # In fact it works even with dummy cpn.
2724 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2725 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2726
2727 # # more consistent results setting it to right before the end
2728 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
2729
2730 qs.update({
2731 'ver': ['2'],
2732 'cpn': [cpn],
2733 'cmt': video_length,
2734 'el': 'detailpage', # otherwise defaults to "shorts"
2735 })
2736
2737 if is_full:
2738 # these seem to mark watchtime "history" in the real world
2739 # they're required, so send in a single value
2740 qs.update({
2741 'st': video_length,
2742 'et': video_length,
2743 })
2744
14f25df2 2745 url = urllib.parse.urlunparse(
2746 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
06cc8f10
B
2747
2748 self._download_webpage(
2749 url, video_id, f'Marking {label}watched',
2750 'Unable to mark watched', fatal=False)
d77ab8e2 2751
bfd973ec 2752 @classmethod
2753 def _extract_from_webpage(cls, url, webpage):
2754 # Invidious Instances
2755 # https://github.com/yt-dlp/yt-dlp/issues/195
2756 # https://github.com/iv-org/invidious/pull/1730
2757 mobj = re.search(
2758 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
2759 webpage)
2760 if mobj:
2761 yield cls.url_result(mobj.group('url'), cls)
2762 raise cls.StopExtraction()
2763
2764 yield from super()._extract_from_webpage(url, webpage)
66c9fa36
S
2765
2766 # lazyYT YouTube embed
bfd973ec 2767 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
2768 yield cls.url_result(unescapeHTML(id_), cls, id_)
66c9fa36
S
2769
2770 # Wordpress "YouTube Video Importer" plugin
bfd973ec 2771 for m in re.findall(r'''(?x)<div[^>]+
2772 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2773 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
2774 yield cls.url_result(m[-1], cls, m[-1])
66c9fa36 2775
97665381
PH
2776 @classmethod
2777 def extract_id(cls, url):
ae61d108 2778 video_id = cls.get_temp_id(url)
2779 if not video_id:
2780 raise ExtractorError(f'Invalid URL: {url}')
2781 return video_id
c5e8d7af 2782
7c365c21 2783 def _extract_chapters_from_json(self, data, duration):
2784 chapter_list = traverse_obj(
2785 data, (
2786 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2787 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2788 ), expected_type=list)
2789
2790 return self._extract_chapters(
2791 chapter_list,
2792 chapter_time=lambda chapter: float_or_none(
2793 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2794 chapter_title=lambda chapter: traverse_obj(
2795 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2796 duration=duration)
2797
2798 def _extract_chapters_from_engagement_panel(self, data, duration):
2799 content_list = traverse_obj(
8bdd16b4 2800 data,
7c365c21 2801 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2802 expected_type=list, default=[])
052e1350 2803 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2804 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2805
1890fc63 2806 return next(filter(None, (
2807 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2808 chapter_time, chapter_title, duration)
2809 for contents in content_list)), [])
7c365c21 2810
1890fc63 2811 def _extract_chapters_from_description(self, description, duration):
2812 return self._extract_chapters(
2813 re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
2814 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
2815 duration=duration, strict=False)
84213ea8 2816
1890fc63 2817 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
2818 if not duration:
2819 return
2820 chapter_list = [{
2821 'start_time': chapter_time(chapter),
2822 'title': chapter_title(chapter),
2823 } for chapter in chapter_list or []]
2824 if not strict:
2825 chapter_list.sort(key=lambda c: c['start_time'] or 0)
2826
a3976e07 2827 chapters = [{'start_time': 0}]
1890fc63 2828 for idx, chapter in enumerate(chapter_list):
a3976e07 2829 if chapter['start_time'] is None:
1890fc63 2830 self.report_warning(f'Incomplete chapter {idx}')
2831 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
1890fc63 2832 chapters.append(chapter)
2833 else:
2834 self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
a3976e07 2835 return chapters[1:]
84213ea8 2836
a1c5d2ca
M
2837 def _extract_comment(self, comment_renderer, parent=None):
2838 comment_id = comment_renderer.get('commentId')
2839 if not comment_id:
2840 return
fe93e2c4 2841
052e1350 2842 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2843
49bd8c66 2844 # note: timestamp is an estimate calculated from the current time and time_text
f3aa3c3f 2845 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
052e1350 2846 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca 2847 author_id = try_get(comment_renderer,
14f25df2 2848 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
fe93e2c4 2849
49bd8c66 2850 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
14f25df2 2851 lambda x: x['likeCount']), str)) or 0
a1c5d2ca 2852 author_thumbnail = try_get(comment_renderer,
14f25df2 2853 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
a1c5d2ca
M
2854
2855 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2856 is_favorited = 'creatorHeart' in (try_get(
2857 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2858 return {
2859 'id': comment_id,
2860 'text': text,
d92f5d5a 2861 'timestamp': timestamp,
a1c5d2ca
M
2862 'time_text': time_text,
2863 'like_count': votes,
97524332 2864 'is_favorited': is_favorited,
a1c5d2ca
M
2865 'author': author,
2866 'author_id': author_id,
2867 'author_thumbnail': author_thumbnail,
2868 'author_is_uploader': author_is_uploader,
2869 'parent': parent or 'root'
2870 }
2871
46383212 2872 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2873
2874 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2d6659b9 2875
2876 def extract_header(contents):
2d6659b9 2877 _continuation = None
2878 for content in contents:
46383212 2879 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
f0d785d3 2880 expected_comment_count = self._get_count(
2881 comments_header_renderer, 'countText', 'commentsCount')
fe93e2c4 2882
2d6659b9 2883 if expected_comment_count:
46383212 2884 tracker['est_total'] = expected_comment_count
2885 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2886 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2d6659b9 2887
2888 sort_menu_item = try_get(
2889 comments_header_renderer,
2890 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2891 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2892
2893 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2894 if not _continuation:
2895 continue
2896
46383212 2897 sort_text = str_or_none(sort_menu_item.get('title'))
2898 if not sort_text:
2d6659b9 2899 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
46383212 2900 self.to_screen('Sorting comments by %s' % sort_text.lower())
2d6659b9 2901 break
a2160aa4 2902 return _continuation
a1c5d2ca 2903
2d6659b9 2904 def extract_thread(contents):
a1c5d2ca 2905 if not parent:
46383212 2906 tracker['current_page_thread'] = 0
a1c5d2ca 2907 for content in contents:
46383212 2908 if not parent and tracker['total_parent_comments'] >= max_parents:
2909 yield
a1c5d2ca 2910 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
46383212 2911 comment_renderer = get_first(
2912 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2913 expected_type=dict, default={})
a1c5d2ca 2914
a1c5d2ca
M
2915 comment = self._extract_comment(comment_renderer, parent)
2916 if not comment:
2917 continue
46383212 2918
2919 tracker['running_total'] += 1
2920 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
a1c5d2ca 2921 yield comment
46383212 2922
a1c5d2ca
M
2923 # Attempt to get the replies
2924 comment_replies_renderer = try_get(
2925 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2926
2927 if comment_replies_renderer:
46383212 2928 tracker['current_page_thread'] += 1
a1c5d2ca 2929 comment_entries_iter = self._comment_entries(
99e9e001 2930 comment_replies_renderer, ytcfg, video_id,
46383212 2931 parent=comment.get('id'), tracker=tracker)
86e5f3ed 2932 yield from itertools.islice(comment_entries_iter, min(
2933 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
a1c5d2ca 2934
46383212 2935 # Keeps track of counts across recursive calls
2936 if not tracker:
2937 tracker = dict(
2938 running_total=0,
2939 est_total=0,
2940 current_page_thread=0,
2941 total_parent_comments=0,
2942 total_reply_comments=0)
2943
2944 # TODO: Deprecated
2d6659b9 2945 # YouTube comments have a max depth of 2
46383212 2946 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2947 if max_depth:
2948 self._downloader.deprecation_warning(
2949 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2d6659b9 2950 if max_depth == 1 and parent:
2951 return
a1c5d2ca 2952
46383212 2953 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2954 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2d6659b9 2955
46383212 2956 continuation = self._extract_continuation(root_continuation_data)
aae16f6e 2957
46383212 2958 response = None
6e634cbe 2959 is_forced_continuation = False
2d6659b9 2960 is_first_continuation = parent is None
6e634cbe 2961 if is_first_continuation and not continuation:
2962 # Sometimes you can get comments by generating the continuation yourself,
2963 # even if YouTube initially reports them being disabled - e.g. stories comments.
2964 # Note: if the comment section is actually disabled, YouTube may return a response with
2965 # required check_get_keys missing. So we will disable that check initially in this case.
2966 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
2967 is_forced_continuation = True
a1c5d2ca
M
2968
2969 for page_num in itertools.count(0):
2970 if not continuation:
2971 break
46383212 2972 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2973 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2d6659b9 2974 if page_num == 0:
2975 if is_first_continuation:
2976 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2977 else:
2d6659b9 2978 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
46383212 2979 tracker['current_page_thread'], comment_prog_str)
2d6659b9 2980 else:
2981 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2982 ' ' if parent else '', ' replies' if parent else '',
2983 page_num, comment_prog_str)
2984
2985 response = self._extract_response(
fe93e2c4 2986 item_id=None, query=continuation,
2d6659b9 2987 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
6e634cbe 2988 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
2989 is_forced_continuation = False
46383212 2990 continuation_contents = traverse_obj(
2991 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
a1c5d2ca 2992
2d6659b9 2993 continuation = None
46383212 2994 for continuation_section in continuation_contents:
2995 continuation_items = traverse_obj(
2996 continuation_section,
2997 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2998 get_all=False, expected_type=list) or []
2999 if is_first_continuation:
3000 continuation = extract_header(continuation_items)
3001 is_first_continuation = False
2d6659b9 3002 if continuation:
a1c5d2ca 3003 break
46383212 3004 continue
a1c5d2ca 3005
46383212 3006 for entry in extract_thread(continuation_items):
3007 if not entry:
3008 return
3009 yield entry
3010 continuation = self._extract_continuation({'contents': continuation_items})
3011 if continuation:
2d6659b9 3012 break
a1c5d2ca 3013
6e634cbe 3014 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3015 if message and not parent and tracker['running_total'] == 0:
3016 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3017
3018 @staticmethod
3019 def _generate_comment_continuation(video_id):
3020 """
3021 Generates initial comment section continuation token from given video id
3022 """
3023 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3024 return base64.b64encode(token.encode()).decode()
3025
a2160aa4 3026 def _get_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 3027 """Entry for comment extraction"""
2d6659b9 3028 def _real_comment_extract(contents):
aae16f6e 3029 renderer = next((
3030 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3031 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3032 yield from self._comment_entries(renderer, ytcfg, video_id)
99e9e001 3033
a2160aa4 3034 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
a2160aa4 3035 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
a1c5d2ca 3036
109dd3b2 3037 @staticmethod
99e9e001 3038 def _get_checkok_params():
3039 return {'contentCheckOk': True, 'racyCheckOk': True}
3040
3041 @classmethod
3042 def _generate_player_context(cls, sts=None):
109dd3b2 3043 context = {
3044 'html5Preference': 'HTML5_PREF_WANTS',
3045 }
3046 if sts is not None:
3047 context['signatureTimestamp'] = sts
3048 return {
3049 'playbackContext': {
3050 'contentPlaybackContext': context
a1a7907b 3051 },
99e9e001 3052 **cls._get_checkok_params()
109dd3b2 3053 }
3054
e7e94f2a
D
3055 @staticmethod
3056 def _is_agegated(player_response):
3057 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
9275f62c 3058 return True
e7e94f2a
D
3059
3060 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3061 AGE_GATE_REASONS = (
3062 'confirm your age', 'age-restricted', 'inappropriate', # reason
3063 'age_verification_required', 'age_check_required', # status
3064 )
3065 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3066
3067 @staticmethod
3068 def _is_unplayable(player_response):
3069 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
9275f62c 3070
99e9e001 3071 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
109dd3b2 3072
11f9be09 3073 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3074 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
b6de707d 3075 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
11f9be09 3076 headers = self.generate_api_headers(
99e9e001 3077 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
9297939e 3078
6e634cbe 3079 yt_query = {
3080 'videoId': video_id,
3081 'params': '8AEB' # enable stories
3082 }
11f9be09 3083 yt_query.update(self._generate_player_context(sts))
3084 return self._extract_response(
3085 item_id=video_id, ep='player', query=yt_query,
379e44ed 3086 ytcfg=player_ytcfg, headers=headers, fatal=True,
000c15a4 3087 default_client=client,
11f9be09 3088 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3089 ) or None
3090
11f9be09 3091 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 3092 requested_clients = []
d0d012d4 3093 default = ['android', 'web']
000c15a4 3094 allowed_clients = sorted(
86e5f3ed 3095 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
000c15a4 3096 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 3097 for client in self._configuration_arg('player_client'):
3098 if client in allowed_clients:
3099 requested_clients.append(client)
d0d012d4 3100 elif client == 'default':
3101 requested_clients.extend(default)
b4c055ba 3102 elif client == 'all':
3103 requested_clients.extend(allowed_clients)
3104 else:
3105 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 3106 if not requested_clients:
d0d012d4 3107 requested_clients = default
cf7e015f 3108
11f9be09 3109 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3110 requested_clients.extend(
e7e94f2a 3111 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
dbdaaa23 3112
11f9be09 3113 return orderedSet(requested_clients)
cf7e015f 3114
99e9e001 3115 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
11f9be09 3116 initial_pr = None
3117 if webpage:
b7c47b74 3118 initial_pr = self._search_json(
3119 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
6b09401b 3120
ae729626 3121 all_clients = set(clients)
c0bc527b 3122 clients = clients[::-1]
b6de707d 3123 prs = []
e7e94f2a 3124
ae729626 3125 def append_client(*client_names):
e7870111 3126 """ Append the first client name that exists but not already used """
ae729626 3127 for client_name in client_names:
e7870111
D
3128 actual_client = _split_innertube_client(client_name)[0]
3129 if actual_client in INNERTUBE_CLIENTS:
3130 if actual_client not in all_clients:
ae729626 3131 clients.append(client_name)
e7870111
D
3132 all_clients.add(actual_client)
3133 return
e7e94f2a 3134
379e44ed 3135 # Android player_response does not have microFormats which are needed for
3136 # extraction of some data. So we return the initial_pr with formats
3137 # stripped out even if not requested by the user
3138 # See: https://github.com/yt-dlp/yt-dlp/issues/501
379e44ed 3139 if initial_pr:
3140 pr = dict(initial_pr)
3141 pr['streamingData'] = None
b6de707d 3142 prs.append(pr)
379e44ed 3143
3144 last_error = None
b6de707d 3145 tried_iframe_fallback = False
3146 player_url = None
c0bc527b 3147 while clients:
e7870111 3148 client, base_client, variant = _split_innertube_client(clients.pop())
11f9be09 3149 player_ytcfg = master_ytcfg if client == 'web' else {}
a25bca9f 3150 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3151 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
c0bc527b 3152
b6de707d 3153 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3154 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3155 if 'js' in self._configuration_arg('player_skip'):
3156 require_js_player = False
3157 player_url = None
3158
3159 if not player_url and not tried_iframe_fallback and require_js_player:
3160 player_url = self._download_player_url(video_id)
3161 tried_iframe_fallback = True
3162
379e44ed 3163 try:
3164 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
99e9e001 3165 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
379e44ed 3166 except ExtractorError as e:
3167 if last_error:
3168 self.report_warning(last_error)
3169 last_error = e
3170 continue
3171
11f9be09 3172 if pr:
a3e96421 3173 # YouTube may return a different video player response than expected.
3174 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3175 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3176 if pr_video_id and pr_video_id != video_id:
3177 self.report_warning(
c7dcf0b3 3178 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
a3e96421 3179 else:
3180 prs.append(pr)
c0bc527b 3181
e7e94f2a 3182 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
e7870111
D
3183 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3184 append_client(f'{base_client}_creator')
e7e94f2a 3185 elif self._is_agegated(pr):
e7870111
D
3186 if variant == 'tv_embedded':
3187 append_client(f'{base_client}_embedded')
3188 elif not variant:
3189 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
c0bc527b 3190
379e44ed 3191 if last_error:
b6de707d 3192 if not len(prs):
379e44ed 3193 raise last_error
3194 self.report_warning(last_error)
b6de707d 3195 return prs, player_url
11f9be09 3196
c646d76f 3197 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
a0bb6ce5 3198 itags, stream_ids = {}, []
5c6d2ef9 3199 itag_qualities, res_qualities = {}, {0: -1}
d3fc8074 3200 q = qualities([
2a9c6dcd 3201 # Normally tiny is the smallest video-only formats. But
3202 # audio-only formats with unknown quality may get tagged as tiny
3203 'tiny',
3204 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 3205 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3206 ])
11f9be09 3207 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 3208
545cc85d 3209 for fmt in streaming_formats:
727029c5 3210 if fmt.get('targetDurationSec'):
545cc85d 3211 continue
321bf820 3212
cc2db878 3213 itag = str_or_none(fmt.get('itag'))
9297939e 3214 audio_track = fmt.get('audioTrack') or {}
3215 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3216 if stream_id in stream_ids:
3217 continue
3218
cc2db878 3219 quality = fmt.get('quality')
2a9c6dcd 3220 height = int_or_none(fmt.get('height'))
d3fc8074 3221 if quality == 'tiny' or not quality:
3222 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 3223 # The 3gp format (17) in android client has a quality of "small",
3224 # but is actually worse than other formats
3225 if itag == '17':
3226 quality = 'tiny'
3227 if quality:
3228 if itag:
3229 itag_qualities[itag] = quality
3230 if height:
3231 res_qualities[height] = quality
cc2db878 3232 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3233 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3234 # number of fragment that would subsequently requested with (`&sq=N`)
3235 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3236 continue
3237
545cc85d 3238 fmt_url = fmt.get('url')
3239 if not fmt_url:
14f25df2 3240 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
545cc85d 3241 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3242 encrypted_sig = try_get(sc, lambda x: x['s'][0])
52023f12 3243 if not all((sc, fmt_url, player_url, encrypted_sig)):
545cc85d 3244 continue
52023f12 3245 try:
3246 fmt_url += '&%s=%s' % (
3247 traverse_obj(sc, ('sp', -1)) or 'signature',
3248 self._decrypt_signature(encrypted_sig, video_id, player_url)
3249 )
3250 except ExtractorError as e:
580ce007 3251 self.report_warning('Signature extraction failed: Some formats may be missing',
3252 video_id=video_id, only_once=True)
52023f12 3253 self.write_debug(e, only_once=True)
201e9eaa 3254 continue
545cc85d 3255
404f611f 3256 query = parse_qs(fmt_url)
3257 throttled = False
b2916526 3258 if query.get('n'):
404f611f 3259 try:
580ce007 3260 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
404f611f 3261 fmt_url = update_url_query(fmt_url, {
580ce007 3262 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3263 })
404f611f 3264 except ExtractorError as e:
25836db6 3265 phantomjs_hint = ''
3266 if isinstance(e, JSInterpreter.Exception):
3267 phantomjs_hint = f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} to workaround the issue\n'
aa9369a2 3268 self.report_warning(
25836db6 3269 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3270 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
52023f12 3271 self.write_debug(e, only_once=True)
404f611f 3272 throttled = True
3273
545cc85d 3274 if itag:
a0bb6ce5 3275 itags[itag] = 'https'
9297939e 3276 stream_ids.append(stream_id)
3277
0ad92dfb 3278 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
ab6df717 3279 language_preference = (
3280 10 if audio_track.get('audioIsDefault') and 10
3281 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3282 else -1)
0ad92dfb 3283 # Some formats may have much smaller duration than others (possibly damaged during encoding)
62b58c09 3284 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
a1b2d843 3285 # Make sure to avoid false positives with small duration differences.
62b58c09 3286 # E.g. __2ABJjxzNo, ySuUZEjARPY
a1b2d843 3287 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
08d30158 3288 if is_damaged:
0f06bcd7 3289 self.report_warning(
3290 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
545cc85d 3291 dct = {
3292 'asr': int_or_none(fmt.get('audioSampleRate')),
3293 'filesize': int_or_none(fmt.get('contentLength')),
3294 'format_id': itag,
34921b43 3295 'format_note': join_nonempty(
26e8e044 3296 '%s%s' % (audio_track.get('displayName') or '',
ab6df717 3297 ' (default)' if language_preference > 0 else ''),
404f611f 3298 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
a4166234 3299 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3300 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
0ad92dfb 3301 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
91e5e839 3302 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3303 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
a4211baf 3304 'fps': int_or_none(fmt.get('fps')) or None,
a4166234 3305 'audio_channels': fmt.get('audioChannels'),
2a9c6dcd 3306 'height': height,
dca3ff4a 3307 'quality': q(quality),
727029c5 3308 'has_drm': bool(fmt.get('drmFamilies')),
cc2db878 3309 'tbr': tbr,
545cc85d 3310 'url': fmt_url,
2a9c6dcd 3311 'width': int_or_none(fmt.get('width')),
ab6df717 3312 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3313 'desc' if language_preference < -1 else ''),
3314 'language_preference': language_preference,
a405b38f 3315 # Strictly de-prioritize damaged and 3gp formats
3316 'preference': -10 if is_damaged else -2 if itag == '17' else None,
545cc85d 3317 }
60bdb7bd 3318 mime_mobj = re.match(
3319 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3320 if mime_mobj:
3321 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3322 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 3323 no_audio = dct.get('acodec') == 'none'
3324 no_video = dct.get('vcodec') == 'none'
3325 if no_audio:
3326 dct['vbr'] = tbr
3327 if no_video:
3328 dct['abr'] = tbr
3329 if no_audio or no_video:
545cc85d 3330 dct['downloader_options'] = {
3331 # Youtube throttles chunks >~10M
3332 'http_chunk_size': 10485760,
bf1317d2 3333 }
7c60c33e 3334 if dct.get('ext'):
3335 dct['container'] = dct['ext'] + '_dash'
11f9be09 3336 yield dct
545cc85d 3337
adbc4ec4 3338 live_from_start = is_live and self.get_param('live_from_start')
4bb6b02f 3339 skip_manifests = self._configuration_arg('skip')
adbc4ec4
THD
3340 if not self.get_param('youtube_include_hls_manifest', True):
3341 skip_manifests.append('hls')
0f06bcd7 3342 if not self.get_param('youtube_include_dash_manifest', True):
3343 skip_manifests.append('dash')
adbc4ec4
THD
3344 get_dash = 'dash' not in skip_manifests and (
3345 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3346 get_hls = not live_from_start and 'hls' not in skip_manifests
5d3a0e79 3347
a0bb6ce5 3348 def process_manifest_format(f, proto, itag):
3349 if itag in itags:
3350 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3351 return False
3352 itag = f'{itag}-{proto}'
3353 if itag:
3354 f['format_id'] = itag
3355 itags[itag] = proto
3356
5c6d2ef9 3357 f['quality'] = itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1)
3358 if f['quality'] == -1 and f.get('height'):
3359 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
a0bb6ce5 3360 return True
2a9c6dcd 3361
c646d76f 3362 subtitles = {}
11f9be09 3363 for sd in streaming_data:
5d3a0e79 3364 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 3365 if hls_manifest_url:
c646d76f 3366 fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)
3367 subtitles = self._merge_subtitles(subs, subtitles)
3368 for f in fmts:
a0bb6ce5 3369 if process_manifest_format(f, 'hls', self._search_regex(
3370 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3371 yield f
545cc85d 3372
5d3a0e79 3373 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3374 if dash_manifest_url:
c646d76f 3375 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3376 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3377 for f in formats:
a0bb6ce5 3378 if process_manifest_format(f, 'dash', f['format_id']):
3379 f['filesize'] = int_or_none(self._search_regex(
3380 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
adbc4ec4
THD
3381 if live_from_start:
3382 f['is_from_start'] = True
3383
a0bb6ce5 3384 yield f
c646d76f 3385 yield subtitles
11f9be09 3386
720c3099 3387 def _extract_storyboard(self, player_responses, duration):
3388 spec = get_first(
3389 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
596379e2 3390 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3391 if not base_url:
720c3099 3392 return
720c3099 3393 L = len(spec) - 1
3394 for i, args in enumerate(spec):
3395 args = args.split('#')
3396 counts = list(map(int_or_none, args[:5]))
3397 if len(args) != 8 or not all(counts):
3398 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3399 continue
3400 width, height, frame_count, cols, rows = counts
3401 N, sigh = args[6:]
3402
3403 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3404 fragment_count = frame_count / (cols * rows)
3405 fragment_duration = duration / fragment_count
3406 yield {
3407 'format_id': f'sb{i}',
3408 'format_note': 'storyboard',
3409 'ext': 'mhtml',
3410 'protocol': 'mhtml',
3411 'acodec': 'none',
3412 'vcodec': 'none',
3413 'url': url,
3414 'width': width,
3415 'height': height,
45e8a04e 3416 'fps': frame_count / duration,
3417 'rows': rows,
3418 'columns': cols,
720c3099 3419 'fragments': [{
b3edc806 3420 'url': url.replace('$M', str(j)),
720c3099 3421 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3422 } for j in range(math.ceil(fragment_count))],
3423 }
3424
adbc4ec4 3425 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
b6de707d 3426 webpage = None
3427 if 'webpage' not in self._configuration_arg('player_skip'):
3428 webpage = self._download_webpage(
6e634cbe 3429 webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)
11f9be09 3430
3431 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
11f9be09 3432
b6de707d 3433 player_responses, player_url = self._extract_player_responses(
11f9be09 3434 self._get_requested_clients(url, smuggled_data),
99e9e001 3435 video_id, webpage, master_ytcfg)
11f9be09 3436
adbc4ec4
THD
3437 return webpage, master_ytcfg, player_responses, player_url
3438
a1b2d843 3439 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
adbc4ec4
THD
3440 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3441 is_live = get_first(video_details, 'isLive')
3442 if is_live is None:
3443 is_live = get_first(live_broadcast_details, 'isLiveNow')
3444
3445 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
c646d76f 3446 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)
adbc4ec4 3447
c646d76f 3448 return live_broadcast_details, is_live, streaming_data, formats, subtitles
adbc4ec4
THD
3449
3450 def _real_extract(self, url):
3451 url, smuggled_data = unsmuggle_url(url, {})
3452 video_id = self._match_id(url)
3453
3454 base_url = self.http_scheme() + '//www.youtube.com/'
3455 webpage_url = base_url + 'watch?v=' + video_id
3456
3457 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3458
11f9be09 3459 playability_statuses = traverse_obj(
3460 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3461
3462 trailer_video_id = get_first(
3463 playability_statuses,
3464 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3465 expected_type=str)
3466 if trailer_video_id:
3467 return self.url_result(
3468 trailer_video_id, self.ie_key(), trailer_video_id)
3469
3470 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3471 if webpage else (lambda x: None))
3472
3473 video_details = traverse_obj(
3474 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3475 microformats = traverse_obj(
3476 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3477 expected_type=dict, default=[])
3478 video_title = (
3479 get_first(video_details, 'title')
3480 or self._get_text(microformats, (..., 'title'))
3481 or search_meta(['og:title', 'twitter:title', 'title']))
3482 video_description = get_first(video_details, 'shortDescription')
3483
d89257f3 3484 multifeed_metadata_list = get_first(
3485 player_responses,
3486 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3487 expected_type=str)
3488 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3489 if self.get_param('noplaylist'):
11f9be09 3490 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
d89257f3 3491 else:
3492 entries = []
3493 feed_ids = []
3494 for feed in multifeed_metadata_list.split(','):
3495 # Unquote should take place before split on comma (,) since textual
3496 # fields may contain comma as well (see
3497 # https://github.com/ytdl-org/youtube-dl/issues/8536)
14f25df2 3498 feed_data = urllib.parse.parse_qs(
ac668111 3499 urllib.parse.unquote_plus(feed))
d89257f3 3500
3501 def feed_entry(name):
3502 return try_get(
14f25df2 3503 feed_data, lambda x: x[name][0], str)
d89257f3 3504
3505 feed_id = feed_entry('id')
3506 if not feed_id:
3507 continue
3508 feed_title = feed_entry('title')
3509 title = video_title
3510 if feed_title:
3511 title += ' (%s)' % feed_title
3512 entries.append({
3513 '_type': 'url_transparent',
3514 'ie_key': 'Youtube',
3515 'url': smuggle_url(
3516 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3517 {'force_singlefeed': True}),
3518 'title': title,
3519 })
3520 feed_ids.append(feed_id)
3521 self.to_screen(
3522 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3523 % (', '.join(feed_ids), video_id))
3524 return self.playlist_result(
3525 entries, video_id, video_title, video_description)
11f9be09 3526
a1b2d843 3527 duration = int_or_none(
3528 get_first(video_details, 'lengthSeconds')
3529 or get_first(microformats, 'lengthSeconds')
3530 or parse_duration(search_meta('duration'))) or None
3531
c646d76f 3532 live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \
3533 self._list_formats(video_id, microformats, video_details, player_responses, player_url)
bf1317d2 3534
545cc85d 3535 if not formats:
11f9be09 3536 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
88acdbc2 3537 self.report_drm(video_id)
11f9be09 3538 pemr = get_first(
3539 playability_statuses,
3540 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3541 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3542 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 3543 if subreason:
545cc85d 3544 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 3545 countries = get_first(microformats, 'availableCountries')
545cc85d 3546 if not countries:
3547 regions_allowed = search_meta('regionsAllowed')
3548 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 3549 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 3550 reason += f'. {subreason}'
545cc85d 3551 if reason:
b7da73eb 3552 self.raise_no_formats(reason, expected=True)
bf1317d2 3553
11f9be09 3554 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 3555 if not keywords and webpage:
3556 keywords = [
3557 unescapeHTML(m.group('content'))
3558 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3559 for keyword in keywords:
3560 if keyword.startswith('yt:stretch='):
201c1459 3561 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3562 if mobj:
3563 # NB: float is intentional for forcing float division
3564 w, h = (float(v) for v in mobj.groups())
3565 if w > 0 and h > 0:
3566 ratio = w / h
3567 for f in formats:
3568 if f.get('vcodec') != 'none':
3569 f['stretched_ratio'] = ratio
3570 break
a709d873 3571 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
ff2751ac 3572 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3573 if thumbnail_url:
3574 thumbnails.append({
3575 'url': thumbnail_url,
ff2751ac 3576 })
fccf5021 3577 original_thumbnails = thumbnails.copy()
3578
0ba692ac 3579 # The best resolution thumbnails sometimes does not appear in the webpage
bfec31be 3580 # See: https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 3581 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
e820fbaa 3582 thumbnail_names = [
962ffcf8 3583 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
bfec31be 3584 # in resolution, these are not the custom thumbnail. So de-prioritize them
3585 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3586 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
cca80fe6 3587 ]
cca80fe6 3588 n_thumbnail_names = len(thumbnail_names)
0ba692ac 3589 thumbnails.extend({
3590 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3591 video_id=video_id, name=name, ext=ext,
3592 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 3593 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 3594 for thumb in thumbnails:
cca80fe6 3595 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 3596 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 3597 self._remove_duplicate_formats(thumbnails)
fccf5021 3598 self._downloader._sort_thumbnails(original_thumbnails)
545cc85d 3599
7ea65411 3600 category = get_first(microformats, 'category') or search_meta('genre')
3601 channel_id = str_or_none(
3602 get_first(video_details, 'channelId')
3603 or get_first(microformats, 'externalChannelId')
3604 or search_meta('channelId'))
7ea65411 3605 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3606
3607 live_content = get_first(video_details, 'isLiveContent')
3608 is_upcoming = get_first(video_details, 'isUpcoming')
3609 if is_live is None:
3610 if is_upcoming or live_content is False:
3611 is_live = False
3612 if is_upcoming is None and (live_content or is_live):
3613 is_upcoming = False
adbc4ec4
THD
3614 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3615 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3616 if not duration and live_end_time and live_start_time:
3617 duration = live_end_time - live_start_time
3618
3619 if is_live and self.get_param('live_from_start'):
3620 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
7ea65411 3621
720c3099 3622 formats.extend(self._extract_storyboard(player_responses, duration))
3623
31b532a1 3624 # source_preference is lower for throttled/potentially damaged formats
7e798d72 3625 self._sort_formats(formats, (
3626 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
720c3099 3627
545cc85d 3628 info = {
3629 'id': video_id,
39ca3b5c 3630 'title': video_title,
545cc85d 3631 'formats': formats,
3632 'thumbnails': thumbnails,
fccf5021 3633 # The best thumbnail that we are sure exists. Prevents unnecessary
3634 # URL checking if user don't care about getting the best possible thumbnail
3635 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
545cc85d 3636 'description': video_description,
11f9be09 3637 'uploader': get_first(video_details, 'author'),
545cc85d 3638 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3639 'uploader_url': owner_profile_url,
3640 'channel_id': channel_id,
a70635b8 3641 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
545cc85d 3642 'duration': duration,
3643 'view_count': int_or_none(
11f9be09 3644 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 3645 or search_meta('interactionCount')),
11f9be09 3646 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 3647 'age_limit': 18 if (
11f9be09 3648 get_first(microformats, 'isFamilySafe') is False
545cc85d 3649 or search_meta('isFamilyFriendly') == 'false'
3650 or search_meta('og:restrictions:age') == '18+') else 0,
3651 'webpage_url': webpage_url,
3652 'categories': [category] if category else None,
3653 'tags': keywords,
11f9be09 3654 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 3655 'is_live': is_live,
3656 'was_live': (False if is_live or is_upcoming or live_content is False
3657 else None if is_live is None or is_upcoming is None
3658 else live_content),
3659 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
adbc4ec4 3660 'release_timestamp': live_start_time,
545cc85d 3661 }
b477fc13 3662
e325a21a 3663 if get_first(video_details, 'isPostLiveDvr'):
3664 self.write_debug('Video is in Post-Live Manifestless mode')
3665 info['live_status'] = 'post_live'
3666 if (duration or 0) > 4 * 3600:
3667 self.report_warning(
3668 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
3669 'This is a known issue and patches are welcome')
3670
c646d76f 3671 subtitles = {}
3944e7af 3672 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 3673 if pctr:
ecdc9049 3674 def get_lang_code(track):
3675 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3676 or track.get('languageCode'))
3677
3678 # Converted into dicts to remove duplicates
3679 captions = {
3680 get_lang_code(sub): sub
3681 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3682 translation_languages = {
3683 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3684 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3685
774d79cc 3686 def process_language(container, base_url, lang_code, sub_name, query):
120916da 3687 lang_subs = container.setdefault(lang_code, [])
545cc85d 3688 for fmt in self._SUBTITLE_FORMATS:
3689 query.update({
3690 'fmt': fmt,
3691 })
3692 lang_subs.append({
3693 'ext': fmt,
60f393e4 3694 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
774d79cc 3695 'name': sub_name,
545cc85d 3696 })
7e72694b 3697
07b47084 3698 # NB: Constructing the full subtitle dictionary is slow
3699 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
3700 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
ecdc9049 3701 for lang_code, caption_track in captions.items():
3702 base_url = caption_track.get('baseUrl')
1235d333 3703 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
545cc85d 3704 if not base_url:
3705 continue
ecdc9049 3706 lang_name = self._get_text(caption_track, 'name', max_runs=1)
545cc85d 3707 if caption_track.get('kind') != 'asr':
545cc85d 3708 if not lang_code:
3709 continue
3710 process_language(
ecdc9049 3711 subtitles, base_url, lang_code, lang_name, {})
3712 if not caption_track.get('isTranslatable'):
3713 continue
3944e7af 3714 for trans_code, trans_name in translation_languages.items():
3715 if not trans_code:
545cc85d 3716 continue
1235d333 3717 orig_trans_code = trans_code
ecdc9049 3718 if caption_track.get('kind') != 'asr':
07b47084 3719 if not get_translated_subs:
18e49408 3720 continue
ecdc9049 3721 trans_code += f'-{lang_code}'
a70635b8 3722 trans_name += format_field(lang_name, None, ' from %s')
d49669ac 3723 # Add an "-orig" label to the original language so that it can be distinguished.
3724 # The subs are returned without "-orig" as well for compatibility
1235d333 3725 if lang_code == f'a-{orig_trans_code}':
0c8d9e5f 3726 process_language(
d49669ac 3727 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3728 # Setting tlang=lang returns damaged subtitles.
d49669ac 3729 process_language(automatic_captions, base_url, trans_code, trans_name,
1235d333 3730 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
c646d76f 3731
3732 info['automatic_captions'] = automatic_captions
3733 info['subtitles'] = subtitles
7e72694b 3734
14f25df2 3735 parsed_url = urllib.parse.urlparse(url)
545cc85d 3736 for component in [parsed_url.fragment, parsed_url.query]:
14f25df2 3737 query = urllib.parse.parse_qs(component)
545cc85d 3738 for k, v in query.items():
3739 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3740 d_k += '_time'
3741 if d_k not in info and k in s_ks:
3742 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
3743
3744 # Youtube Music Auto-generated description
822b9d9c 3745 if video_description:
1890fc63 3746 mobj = re.search(
3747 r'''(?xs)
3748 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
3749 (?P<album>[^\n]+)
3750 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
3751 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
3752 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
3753 .+\nAuto-generated\ by\ YouTube\.\s*$
3754 ''', video_description)
822b9d9c 3755 if mobj:
822b9d9c
RA
3756 release_year = mobj.group('release_year')
3757 release_date = mobj.group('release_date')
3758 if release_date:
3759 release_date = release_date.replace('-', '')
3760 if not release_year:
545cc85d 3761 release_year = release_date[:4]
3762 info.update({
3763 'album': mobj.group('album'.strip()),
3764 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3765 'track': mobj.group('track').strip(),
3766 'release_date': release_date,
cc2db878 3767 'release_year': int_or_none(release_year),
545cc85d 3768 })
7e72694b 3769
545cc85d 3770 initial_data = None
3771 if webpage:
56ba69e4 3772 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
545cc85d 3773 if not initial_data:
99e9e001 3774 query = {'videoId': video_id}
3775 query.update(self._get_checkok_params())
109dd3b2 3776 initial_data = self._extract_response(
3777 item_id=video_id, ep='next', fatal=False,
99e9e001 3778 ytcfg=master_ytcfg, query=query,
3779 headers=self.generate_api_headers(ytcfg=master_ytcfg),
109dd3b2 3780 note='Downloading initial data API JSON')
545cc85d 3781
0df111a3 3782 info['comment_count'] = traverse_obj(initial_data, (
3783 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
3784 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
3785 ), (
3786 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
3787 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
3788 ), expected_type=int_or_none, get_all=False)
3789
19a03940 3790 try: # This will error if there is no livechat
c60ee3a2 3791 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
19a03940 3792 except (KeyError, IndexError, TypeError):
3793 pass
3794 else:
ecdc9049 3795 info.setdefault('subtitles', {})['live_chat'] = [{
4ce05f57 3796 # url is needed to set cookies
3797 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
c60ee3a2 3798 'video_id': video_id,
3799 'ext': 'json',
f6745c49 3800 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 3801 }]
545cc85d 3802
3803 if initial_data:
7c365c21 3804 info['chapters'] = (
3805 self._extract_chapters_from_json(initial_data, duration)
3806 or self._extract_chapters_from_engagement_panel(initial_data, duration)
0fe51254 3807 or self._extract_chapters_from_description(video_description, duration)
7c365c21 3808 or None)
545cc85d 3809
17322130 3810 contents = traverse_obj(
3811 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
3812 expected_type=list, default=[])
3813
3814 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
3815 if vpir:
3816 stl = vpir.get('superTitleLink')
3817 if stl:
3818 stl = self._get_text(stl)
3819 if try_get(
3820 vpir,
3821 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3822 info['location'] = stl
3823 else:
affc4fef 3824 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
17322130 3825 if mobj:
545cc85d 3826 info.update({
17322130 3827 'series': mobj.group(1),
3828 'season_number': int(mobj.group(2)),
3829 'episode_number': int(mobj.group(3)),
545cc85d 3830 })
17322130 3831 for tlb in (try_get(
3832 vpir,
3833 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3834 list) or []):
3835 tbr = tlb.get('toggleButtonRenderer') or {}
3836 for getter, regex in [(
3837 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3838 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3839 lambda x: x['accessibility'],
3840 lambda x: x['accessibilityData']['accessibilityData'],
3841 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3842 label = (try_get(tbr, getter, dict) or {}).get('label')
3843 if label:
3844 mobj = re.match(regex, label)
3845 if mobj:
3846 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
545cc85d 3847 break
17322130 3848 sbr_tooltip = try_get(
3849 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3850 if sbr_tooltip:
3851 like_count, dislike_count = sbr_tooltip.split(' / ')
3852 info.update({
3853 'like_count': str_to_int(like_count),
3854 'dislike_count': str_to_int(dislike_count),
3855 })
3856 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
3857 if vsir:
3858 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
3859 info.update({
3860 'channel': self._get_text(vor, 'title'),
3861 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
3862
3863 rows = try_get(
3864 vsir,
3865 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3866 list) or []
3867 multiple_songs = False
3868 for row in rows:
3869 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3870 multiple_songs = True
3871 break
3872 for row in rows:
3873 mrr = row.get('metadataRowRenderer') or {}
3874 mrr_title = mrr.get('title')
3875 if not mrr_title:
3876 continue
3877 mrr_title = self._get_text(mrr, 'title')
3878 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3879 if mrr_title == 'License':
3880 info['license'] = mrr_contents_text
3881 elif not multiple_songs:
3882 if mrr_title == 'Album':
3883 info['album'] = mrr_contents_text
3884 elif mrr_title == 'Artist':
3885 info['artist'] = mrr_contents_text
3886 elif mrr_title == 'Song':
3887 info['track'] = mrr_contents_text
545cc85d 3888
3889 fallbacks = {
3890 'channel': 'uploader',
3891 'channel_id': 'uploader_id',
3892 'channel_url': 'uploader_url',
3893 }
992f9a73 3894
17322130 3895 # The upload date for scheduled, live and past live streams / premieres in microformats
3896 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
992f9a73 3897 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
17322130 3898 upload_date = (
3899 unified_strdate(get_first(microformats, 'uploadDate'))
3900 or unified_strdate(search_meta('uploadDate')))
3901 if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
6e634cbe 3902 upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
17322130 3903 info['upload_date'] = upload_date
992f9a73 3904
545cc85d 3905 for to, frm in fallbacks.items():
3906 if not info.get(to):
3907 info[to] = info.get(frm)
3908
3909 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3910 v = info.get(s_k)
3911 if v:
3912 info[d_k] = v
b84071c0 3913
11f9be09 3914 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3915 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3916 is_membersonly = None
b28f8d24 3917 is_premium = None
c224251a
M
3918 if initial_data and is_private is not None:
3919 is_membersonly = False
b28f8d24 3920 is_premium = False
47193e02 3921 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3922 badge_labels = set()
3923 for content in contents:
3924 if not isinstance(content, dict):
3925 continue
3926 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3927 for badge_label in badge_labels:
3928 if badge_label.lower() == 'members only':
3929 is_membersonly = True
3930 elif badge_label.lower() == 'premium':
3931 is_premium = True
3932 elif badge_label.lower() == 'unlisted':
3933 is_unlisted = True
c224251a 3934
c224251a
M
3935 info['availability'] = self._availability(
3936 is_private=is_private,
b28f8d24 3937 needs_premium=is_premium,
c224251a
M
3938 needs_subscription=is_membersonly,
3939 needs_auth=info['age_limit'] >= 18,
3940 is_unlisted=None if is_private is None else is_unlisted)
3941
a2160aa4 3942 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3943
11f9be09 3944 self.mark_watched(video_id, player_responses)
d77ab8e2 3945
545cc85d 3946 return info
c5e8d7af 3947
a61fd4cf 3948
a6213a49 3949class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
5f6a1245 3950
182bda88 3951 @staticmethod
3952 def passthrough_smuggled_data(func):
3953 def _smuggle(entries, smuggled_data):
3954 for entry in entries:
3955 # TODO: Convert URL to music.youtube instead.
3956 # Do we need to passthrough any other smuggled_data?
3957 entry['url'] = smuggle_url(entry['url'], smuggled_data)
3958 yield entry
3959
3960 @functools.wraps(func)
3961 def wrapper(self, url):
3962 url, smuggled_data = unsmuggle_url(url, {})
3963 if self.is_music_url(url):
3964 smuggled_data['is_music_url'] = True
3965 info_dict = func(self, url, smuggled_data)
3966 if smuggled_data and info_dict.get('entries'):
3967 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
3968 return info_dict
3969 return wrapper
3970
a6213a49 3971 def _extract_channel_id(self, webpage):
3972 channel_id = self._html_search_meta(
3973 'channelId', webpage, 'channel id', default=None)
3974 if channel_id:
3975 return channel_id
3976 channel_url = self._html_search_meta(
3977 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3978 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3979 'twitter:app:url:googleplay'), webpage, 'channel url')
3980 return self._search_regex(
3981 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3982 channel_url, 'channel id')
15f6397c 3983
8bdd16b4 3984 @staticmethod
cd7c66cf 3985 def _extract_basic_item_renderer(item):
3986 # Modified from _extract_grid_item_renderer
201c1459 3987 known_basic_renderers = (
a17526e4 3988 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
cd7c66cf 3989 )
3990 for key, renderer in item.items():
201c1459 3991 if not isinstance(renderer, dict):
cd7c66cf 3992 continue
201c1459 3993 elif key in known_basic_renderers:
3994 return renderer
3995 elif key.startswith('grid') and key.endswith('Renderer'):
3996 return renderer
8bdd16b4 3997
8bdd16b4 3998 def _grid_entries(self, grid_renderer):
3999 for item in grid_renderer['items']:
4000 if not isinstance(item, dict):
39b62db1 4001 continue
cd7c66cf 4002 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 4003 if not isinstance(renderer, dict):
4004 continue
052e1350 4005 title = self._get_text(renderer, 'title')
fe93e2c4 4006
8bdd16b4 4007 # playlist
4008 playlist_id = renderer.get('playlistId')
4009 if playlist_id:
4010 yield self.url_result(
4011 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4012 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4013 video_title=title)
201c1459 4014 continue
8bdd16b4 4015 # video
4016 video_id = renderer.get('videoId')
4017 if video_id:
4018 yield self._extract_video(renderer)
201c1459 4019 continue
8bdd16b4 4020 # channel
4021 channel_id = renderer.get('channelId')
4022 if channel_id:
8bdd16b4 4023 yield self.url_result(
4024 'https://www.youtube.com/channel/%s' % channel_id,
4025 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 4026 continue
4027 # generic endpoint URL support
4028 ep_url = urljoin('https://www.youtube.com/', try_get(
4029 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4030 str))
201c1459 4031 if ep_url:
4032 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4033 if ie.suitable(ep_url):
4034 yield self.url_result(
4035 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4036 break
8bdd16b4 4037
16aa9ea4 4038 def _music_reponsive_list_entry(self, renderer):
4039 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4040 if video_id:
4041 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4042 ie=YoutubeIE.ie_key(), video_id=video_id)
4043 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4044 if playlist_id:
4045 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4046 if video_id:
4047 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4048 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4049 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4050 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4051 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4052 if browse_id:
4053 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4054 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4055
3d3dddc9 4056 def _shelf_entries_from_content(self, shelf_renderer):
4057 content = shelf_renderer.get('content')
4058 if not isinstance(content, dict):
8bdd16b4 4059 return
cd7c66cf 4060 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 4061 if renderer:
4062 # TODO: add support for nested playlists so each shelf is processed
4063 # as separate playlist
4064 # TODO: this includes only first N items
86e5f3ed 4065 yield from self._grid_entries(renderer)
3d3dddc9 4066 renderer = content.get('horizontalListRenderer')
4067 if renderer:
4068 # TODO
4069 pass
8bdd16b4 4070
29f7c58a 4071 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 4072 ep = try_get(
4073 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4074 str)
8bdd16b4 4075 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 4076 if shelf_url:
29f7c58a 4077 # Skipping links to another channels, note that checking for
4078 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4079 # will not work
4080 if skip_channels and '/channels?' in shelf_url:
4081 return
052e1350 4082 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 4083 yield self.url_result(shelf_url, video_title=title)
4084 # Shelf may not contain shelf URL, fallback to extraction from content
86e5f3ed 4085 yield from self._shelf_entries_from_content(shelf_renderer)
c5e8d7af 4086
8bdd16b4 4087 def _playlist_entries(self, video_list_renderer):
4088 for content in video_list_renderer['contents']:
4089 if not isinstance(content, dict):
4090 continue
4091 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4092 if not isinstance(renderer, dict):
4093 continue
4094 video_id = renderer.get('videoId')
4095 if not video_id:
4096 continue
4097 yield self._extract_video(renderer)
07aeced6 4098
3462ffa8 4099 def _rich_entries(self, rich_grid_renderer):
4100 renderer = try_get(
70d5c17b 4101 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 4102 video_id = renderer.get('videoId')
4103 if not video_id:
4104 return
4105 yield self._extract_video(renderer)
4106
8bdd16b4 4107 def _video_entry(self, video_renderer):
4108 video_id = video_renderer.get('videoId')
4109 if video_id:
4110 return self._extract_video(video_renderer)
dacb3a86 4111
ad210f4f 4112 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4113 url = urljoin('https://youtube.com', traverse_obj(
4114 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4115 if url:
4116 return self.url_result(
4117 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4118
8bdd16b4 4119 def _post_thread_entries(self, post_thread_renderer):
4120 post_renderer = try_get(
4121 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4122 if not post_renderer:
4123 return
4124 # video attachment
4125 video_renderer = try_get(
895b0931 4126 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4127 video_id = video_renderer.get('videoId')
4128 if video_id:
4129 entry = self._extract_video(video_renderer)
8bdd16b4 4130 if entry:
4131 yield entry
895b0931 4132 # playlist attachment
4133 playlist_id = try_get(
14f25df2 4134 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
895b0931 4135 if playlist_id:
4136 yield self.url_result(
e28f1c0a 4137 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4138 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4139 # inline video links
4140 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4141 for run in runs:
4142 if not isinstance(run, dict):
4143 continue
4144 ep_url = try_get(
14f25df2 4145 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
8bdd16b4 4146 if not ep_url:
4147 continue
4148 if not YoutubeIE.suitable(ep_url):
4149 continue
4150 ep_video_id = YoutubeIE._match_id(ep_url)
4151 if video_id == ep_video_id:
4152 continue
895b0931 4153 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 4154
8bdd16b4 4155 def _post_thread_continuation_entries(self, post_thread_continuation):
4156 contents = post_thread_continuation.get('contents')
4157 if not isinstance(contents, list):
4158 return
4159 for content in contents:
4160 renderer = content.get('backstagePostThreadRenderer')
6b0b0a28 4161 if isinstance(renderer, dict):
4162 yield from self._post_thread_entries(renderer)
8bdd16b4 4163 continue
6b0b0a28 4164 renderer = content.get('videoRenderer')
4165 if isinstance(renderer, dict):
4166 yield self._video_entry(renderer)
07aeced6 4167
39ed931e 4168 r''' # unused
4169 def _rich_grid_entries(self, contents):
4170 for content in contents:
4171 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4172 if video_renderer:
4173 entry = self._video_entry(video_renderer)
4174 if entry:
4175 yield entry
4176 '''
52efa4b3 4177
a6213a49 4178 def _extract_entries(self, parent_renderer, continuation_list):
4179 # continuation_list is modified in-place with continuation_list = [continuation_token]
4180 continuation_list[:] = [None]
4181 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4182 for content in contents:
4183 if not isinstance(content, dict):
4184 continue
16aa9ea4 4185 is_renderer = traverse_obj(
4186 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4187 expected_type=dict)
a6213a49 4188 if not is_renderer:
4189 renderer = content.get('richItemRenderer')
4190 if renderer:
4191 for entry in self._rich_entries(renderer):
4192 yield entry
4193 continuation_list[0] = self._extract_continuation(parent_renderer)
4194 continue
4195 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4196 for isr_content in isr_contents:
4197 if not isinstance(isr_content, dict):
8bdd16b4 4198 continue
69184e41 4199
a6213a49 4200 known_renderers = {
4201 'playlistVideoListRenderer': self._playlist_entries,
4202 'gridRenderer': self._grid_entries,
a17526e4 4203 'reelShelfRenderer': self._grid_entries,
4204 'shelfRenderer': self._shelf_entries,
16aa9ea4 4205 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
a6213a49 4206 'backstagePostThreadRenderer': self._post_thread_entries,
4207 'videoRenderer': lambda x: [self._video_entry(x)],
a61fd4cf 4208 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4209 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
ad210f4f 4210 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
a6213a49 4211 }
4212 for key, renderer in isr_content.items():
4213 if key not in known_renderers:
4214 continue
4215 for entry in known_renderers[key](renderer):
4216 if entry:
4217 yield entry
4218 continuation_list[0] = self._extract_continuation(renderer)
4219 break
70d5c17b 4220
4221 if not continuation_list[0]:
a6213a49 4222 continuation_list[0] = self._extract_continuation(is_renderer)
3462ffa8 4223
a6213a49 4224 if not continuation_list[0]:
4225 continuation_list[0] = self._extract_continuation(parent_renderer)
4226
4227 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4228 continuation_list = [None]
4229 extract_entries = lambda x: self._extract_entries(x, continuation_list)
29f7c58a 4230 tab_content = try_get(tab, lambda x: x['content'], dict)
4231 if not tab_content:
4232 return
3462ffa8 4233 parent_renderer = (
29f7c58a 4234 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4235 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
86e5f3ed 4236 yield from extract_entries(parent_renderer)
3462ffa8 4237 continuation = continuation_list[0]
d069eca7 4238
8bdd16b4 4239 for page_num in itertools.count(1):
4240 if not continuation:
4241 break
99e9e001 4242 headers = self.generate_api_headers(
4243 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
79360d99 4244 response = self._extract_response(
86e5f3ed 4245 item_id=f'{item_id} page {page_num}',
fe93e2c4 4246 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 4247 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
4248
4249 if not response:
8bdd16b4 4250 break
ac56cf38 4251 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4252 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4253 visitor_data = self._extract_visitor_data(response) or visitor_data
ebf1b291 4254
69184e41 4255 known_continuation_renderers = {
4256 'playlistVideoListContinuation': self._playlist_entries,
4257 'gridContinuation': self._grid_entries,
4258 'itemSectionContinuation': self._post_thread_continuation_entries,
4259 'sectionListContinuation': extract_entries, # for feeds
4260 }
8bdd16b4 4261 continuation_contents = try_get(
69184e41 4262 response, lambda x: x['continuationContents'], dict) or {}
4263 continuation_renderer = None
4264 for key, value in continuation_contents.items():
4265 if key not in known_continuation_renderers:
3462ffa8 4266 continue
69184e41 4267 continuation_renderer = value
4268 continuation_list = [None]
86e5f3ed 4269 yield from known_continuation_renderers[key](continuation_renderer)
69184e41 4270 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
4271 break
4272 if continuation_renderer:
4273 continue
c5e8d7af 4274
a1b535bd 4275 known_renderers = {
e4b98809 4276 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
a1b535bd 4277 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4278 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 4279 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 4280 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 4281 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 4282 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 4283 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 4284 }
cce889b9 4285 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 4286 continuation_items = try_get(
cce889b9 4287 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 4288 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
4289 video_items_renderer = None
4290 for key, value in continuation_item.items():
4291 if key not in known_renderers:
8bdd16b4 4292 continue
a1b535bd 4293 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 4294 continuation_list = [None]
86e5f3ed 4295 yield from known_renderers[key][0](video_items_renderer)
9ba5705a 4296 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 4297 break
4298 if video_items_renderer:
4299 continue
8bdd16b4 4300 break
9558dcec 4301
8bdd16b4 4302 @staticmethod
7c219ea6 4303 def _extract_selected_tab(tabs, fatal=True):
8bdd16b4 4304 for tab in tabs:
cd684175 4305 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4306 if renderer.get('selected') is True:
4307 return renderer
2b3c2546 4308 else:
7c219ea6 4309 if fatal:
4310 raise ExtractorError('Unable to find selected tab')
b82f815f 4311
61d3665d 4312 def _extract_uploader(self, data):
8bdd16b4 4313 uploader = {}
61d3665d 4314 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
47193e02 4315 owner = try_get(
4316 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4317 if owner:
61d3665d 4318 owner_text = owner.get('text')
4319 uploader['uploader'] = self._search_regex(
4320 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
47193e02 4321 uploader['uploader_id'] = try_get(
14f25df2 4322 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
47193e02 4323 uploader['uploader_url'] = urljoin(
4324 'https://www.youtube.com/',
14f25df2 4325 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
9c3fe2ef 4326 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 4327
ac56cf38 4328 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
b60419c5 4329 playlist_id = title = description = channel_url = channel_name = channel_id = None
ac56cf38 4330 tags = []
b60419c5 4331
8bdd16b4 4332 selected_tab = self._extract_selected_tab(tabs)
f0d785d3 4333 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
8bdd16b4 4334 renderer = try_get(
4335 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4336 if renderer:
b60419c5 4337 channel_name = renderer.get('title')
4338 channel_url = renderer.get('channelUrl')
4339 channel_id = renderer.get('externalId')
39ed931e 4340 else:
64c0d954 4341 renderer = try_get(
4342 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 4343
8bdd16b4 4344 if renderer:
4345 title = renderer.get('title')
ecc97af3 4346 description = renderer.get('description', '')
b60419c5 4347 playlist_id = channel_id
4348 tags = renderer.get('keywords', '').split()
b60419c5 4349
301d07fc 4350 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4351 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4352 def _get_uncropped(url):
4353 return url_or_none((url or '').split('=')[0] + '=s0')
4354
4355 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4356 if avatar_thumbnails:
4357 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4358 if uncropped_avatar:
4359 avatar_thumbnails.append({
4360 'url': uncropped_avatar,
4361 'id': 'avatar_uncropped',
4362 'preference': 1
4363 })
4364
4365 channel_banners = self._extract_thumbnails(
4366 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4367 for banner in channel_banners:
4368 banner['preference'] = -10
4369
4370 if channel_banners:
4371 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4372 if uncropped_banner:
4373 channel_banners.append({
4374 'url': uncropped_banner,
4375 'id': 'banner_uncropped',
4376 'preference': -5
4377 })
4378
4379 primary_thumbnails = self._extract_thumbnails(
a17526e4 4380 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
a709d873 4381
3462ffa8 4382 if playlist_id is None:
70d5c17b 4383 playlist_id = item_id
f0d785d3 4384
4385 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
4386 last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
70d5c17b 4387 if title is None:
f0d785d3 4388 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
b60419c5 4389 title += format_field(selected_tab, 'title', ' - %s')
cd684175 4390 title += format_field(selected_tab, 'expandedText', ' - %s')
f0d785d3 4391
b60419c5 4392 metadata = {
4393 'playlist_id': playlist_id,
4394 'playlist_title': title,
4395 'playlist_description': description,
4396 'uploader': channel_name,
4397 'uploader_id': channel_id,
4398 'uploader_url': channel_url,
301d07fc 4399 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
b60419c5 4400 'tags': tags,
f0d785d3 4401 'view_count': self._get_count(playlist_stats, 1),
4402 'availability': self._extract_availability(data),
4403 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
6c73052c 4404 'playlist_count': self._get_count(playlist_stats, 0),
4405 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
b60419c5 4406 }
4407 if not channel_id:
4408 metadata.update(self._extract_uploader(data))
4409 metadata.update({
4410 'channel': metadata['uploader'],
4411 'channel_id': metadata['uploader_id'],
4412 'channel_url': metadata['uploader_url']})
4413 return self.playlist_result(
d069eca7 4414 self._entries(
ac56cf38 4415 selected_tab, playlist_id, ytcfg,
4416 self._extract_account_syncid(ytcfg, data),
4417 self._extract_visitor_data(data, ytcfg)),
b60419c5 4418 **metadata)
73c4ac2c 4419
6e634cbe 4420 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
ac56cf38 4421 first_id = last_id = response = None
2be71994 4422 for page_num in itertools.count(1):
cd7c66cf 4423 videos = list(self._playlist_entries(playlist))
4424 if not videos:
4425 return
2be71994 4426 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4427 if start >= len(videos):
4428 return
24146491 4429 yield from videos[start:]
2be71994 4430 first_id = first_id or videos[0]['id']
4431 last_id = videos[-1]['id']
79360d99 4432 watch_endpoint = try_get(
4433 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
ac56cf38 4434 headers = self.generate_api_headers(
4435 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4436 visitor_data=self._extract_visitor_data(response, data, ytcfg))
79360d99 4437 query = {
4438 'playlistId': playlist_id,
4439 'videoId': watch_endpoint.get('videoId') or last_id,
4440 'index': watch_endpoint.get('index') or len(videos),
4441 'params': watch_endpoint.get('params') or 'OAE%3D'
4442 }
4443 response = self._extract_response(
4444 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 4445 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 4446 check_get_keys='contents'
4447 )
cd7c66cf 4448 playlist = try_get(
79360d99 4449 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 4450
ac56cf38 4451 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
8bdd16b4 4452 title = playlist.get('title') or try_get(
14f25df2 4453 data, lambda x: x['titleText']['simpleText'], str)
8bdd16b4 4454 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 4455
4456 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 4457 playlist_url = urljoin(url, try_get(
4458 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
14f25df2 4459 str))
6e634cbe 4460
4461 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4462 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4463 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4464
4465 if playlist_url and playlist_url != url and not is_known_unviewable:
29f7c58a 4466 return self.url_result(
4467 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4468 video_title=title)
cd7c66cf 4469
8bdd16b4 4470 return self.playlist_result(
6e634cbe 4471 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
cd7c66cf 4472 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4473
47193e02 4474 def _extract_availability(self, data):
4475 """
4476 Gets the availability of a given playlist/tab.
4477 Note: Unless YouTube tells us explicitly, we do not assume it is public
4478 @param data: response
4479 """
4480 is_private = is_unlisted = None
4481 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4482 badge_labels = self._extract_badges(renderer)
4483
4484 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4485 privacy_dropdown_entries = try_get(
4486 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4487 for renderer_dict in privacy_dropdown_entries:
4488 is_selected = try_get(
4489 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4490 if not is_selected:
4491 continue
052e1350 4492 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4493 if label:
4494 badge_labels.add(label.lower())
4495 break
4496
4497 for badge_label in badge_labels:
4498 if badge_label == 'unlisted':
4499 is_unlisted = True
4500 elif badge_label == 'private':
4501 is_private = True
4502 elif badge_label == 'public':
4503 is_unlisted = is_private = False
4504 return self._availability(is_private, False, False, False, is_unlisted)
4505
4506 @staticmethod
4507 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4508 sidebar_renderer = try_get(
4509 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4510 for item in sidebar_renderer:
4511 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4512 if renderer:
4513 return renderer
4514
ac56cf38 4515 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
358de58c 4516 """
4517 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4518 """
5d342002 4519 browse_id = params = None
47193e02 4520 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4521 if not renderer:
4522 return
4523 menu_renderer = try_get(
4524 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4525 for menu_item in menu_renderer:
4526 if not isinstance(menu_item, dict):
358de58c 4527 continue
47193e02 4528 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4529 text = try_get(
14f25df2 4530 nav_item_renderer, lambda x: x['text']['simpleText'], str)
47193e02 4531 if not text or text.lower() != 'show unavailable videos':
4532 continue
4533 browse_endpoint = try_get(
4534 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4535 browse_id = browse_endpoint.get('browseId')
4536 params = browse_endpoint.get('params')
4537 break
5d342002 4538
11f9be09 4539 headers = self.generate_api_headers(
99e9e001 4540 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
ac56cf38 4541 visitor_data=self._extract_visitor_data(data, ytcfg))
47193e02 4542 query = {
4543 'params': params or 'wgYCCAA=',
4544 'browseId': browse_id or 'VL%s' % item_id
4545 }
4546 return self._extract_response(
4547 item_id=item_id, headers=headers, query=query,
fe93e2c4 4548 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4549 note='Downloading API JSON with unavailable videos')
358de58c 4550
2762dbb1 4551 @functools.cached_property
a25bca9f 4552 def skip_webpage(self):
4553 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4554
ac56cf38 4555 def _extract_webpage(self, url, item_id, fatal=True):
be5c1ae8 4556 webpage, data = None, None
4557 for retry in self.RetryManager(fatal=fatal):
ac56cf38 4558 try:
be5c1ae8 4559 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
ac56cf38 4560 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4561 except ExtractorError as e:
4562 if isinstance(e.cause, network_exceptions):
14f25df2 4563 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
be5c1ae8 4564 retry.error = e
4565 continue
4566 self._error_or_warning(e, fatal=fatal)
14fdfea9 4567 break
ac56cf38 4568
be5c1ae8 4569 try:
4570 self._extract_and_report_alerts(data)
4571 except ExtractorError as e:
4572 self._error_or_warning(e, fatal=fatal)
4573 break
ac56cf38 4574
be5c1ae8 4575 # Sometimes youtube returns a webpage with incomplete ytInitialData
4576 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4577 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
4578 retry.error = ExtractorError('Incomplete yt initial data received')
4579 continue
ac56cf38 4580
cd7c66cf 4581 return webpage, data
4582
a25bca9f 4583 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4584 """Use if failed to extract ytcfg (and data) from initial webpage"""
4585 if not ytcfg and self.is_authenticated:
4586 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4587 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4588 raise ExtractorError(
4589 f'{msg}. If you are not downloading private content, or '
4590 'your cookies are only for the first account and channel,'
4591 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4592 expected=True)
4593 self.report_warning(msg, only_once=True)
4594
ac56cf38 4595 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4596 data = None
a25bca9f 4597 if not self.skip_webpage:
ac56cf38 4598 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4599 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
1108613f 4600 # Reject webpage data if redirected to home page without explicitly requesting
4601 selected_tab = self._extract_selected_tab(traverse_obj(
7c219ea6 4602 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
1108613f 4603 if (url != 'https://www.youtube.com/feed/recommended'
4604 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4605 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4606 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4607 if fatal:
4608 raise ExtractorError(msg, expected=True)
4609 self.report_warning(msg, only_once=True)
ac56cf38 4610 if not data:
a25bca9f 4611 self._report_playlist_authcheck(ytcfg, fatal=fatal)
ac56cf38 4612 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4613 return data, ytcfg
4614
4615 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4616 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4617 resolve_response = self._extract_response(
4618 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4619 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4620 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4621 for ep_key, ep in endpoints.items():
4622 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4623 if params:
4624 return self._extract_response(
4625 item_id=item_id, query=params, ep=ep, headers=headers,
4626 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
7c219ea6 4627 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
ac56cf38 4628 err_note = 'Failed to resolve url (does the playlist exist?)'
4629 if fatal:
4630 raise ExtractorError(err_note, expected=True)
4631 self.report_warning(err_note, item_id)
4632
a6213a49 4633 _SEARCH_PARAMS = None
4634
af5c1c55 4635 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
a6213a49 4636 data = {'query': query}
4637 if params is NO_DEFAULT:
4638 params = self._SEARCH_PARAMS
4639 if params:
4640 data['params'] = params
16aa9ea4 4641
4642 content_keys = (
4643 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4644 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4645 # ytmusic search
4646 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4647 ('continuationContents', ),
4648 )
a25bca9f 4649 display_id = f'query "{query}"'
86e5f3ed 4650 check_get_keys = tuple({keys[0] for keys in content_keys})
a25bca9f 4651 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4652 self._report_playlist_authcheck(ytcfg, fatal=False)
16aa9ea4 4653
a61fd4cf 4654 continuation_list = [None]
a25bca9f 4655 search = None
a6213a49 4656 for page_num in itertools.count(1):
a61fd4cf 4657 data.update(continuation_list[0] or {})
a25bca9f 4658 headers = self.generate_api_headers(
4659 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
a6213a49 4660 search = self._extract_response(
a25bca9f 4661 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4662 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
16aa9ea4 4663 slr_contents = traverse_obj(search, *content_keys)
4664 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
a61fd4cf 4665 if not continuation_list[0]:
a6213a49 4666 break
4667
4668
4669class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4670 IE_DESC = 'YouTube Tabs'
4671 _VALID_URL = r'''(?x:
4672 https?://
4673 (?:\w+\.)?
4674 (?:
4675 youtube(?:kids)?\.com|
4676 %(invidious)s
4677 )/
4678 (?:
4679 (?P<channel_type>channel|c|user|browse)/|
4680 (?P<not_channel>
4681 feed/|hashtag/|
4682 (?:playlist|watch)\?.*?\blist=
4683 )|
4684 (?!(?:%(reserved_names)s)\b) # Direct URLs
4685 )
4686 (?P<id>[^/?\#&]+)
4687 )''' % {
4688 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4689 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4690 }
4691 IE_NAME = 'youtube:tab'
4692
4693 _TESTS = [{
4694 'note': 'playlists, multipage',
4695 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4696 'playlist_mincount': 94,
4697 'info_dict': {
4698 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4699 'title': 'Igor Kleiner - Playlists',
a6213a49 4700 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
976ae3ea 4701 'uploader': 'Igor Kleiner',
a6213a49 4702 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4703 'channel': 'Igor Kleiner',
4704 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4705 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4706 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4707 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4708 'channel_follower_count': int
a6213a49 4709 },
4710 }, {
4711 'note': 'playlists, multipage, different order',
4712 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4713 'playlist_mincount': 94,
4714 'info_dict': {
4715 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4716 'title': 'Igor Kleiner - Playlists',
a6213a49 4717 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4718 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
976ae3ea 4719 'uploader': 'Igor Kleiner',
4720 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4721 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4722 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4723 'channel': 'Igor Kleiner',
4724 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
6c73052c 4725 'channel_follower_count': int
a6213a49 4726 },
4727 }, {
4728 'note': 'playlists, series',
4729 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4730 'playlist_mincount': 5,
4731 'info_dict': {
4732 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4733 'title': '3Blue1Brown - Playlists',
4734 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4735 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4736 'uploader': '3Blue1Brown',
976ae3ea 4737 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4738 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4739 'channel': '3Blue1Brown',
4740 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4741 'tags': ['Mathematics'],
6c73052c 4742 'channel_follower_count': int
a6213a49 4743 },
4744 }, {
4745 'note': 'playlists, singlepage',
4746 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4747 'playlist_mincount': 4,
4748 'info_dict': {
4749 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4750 'title': 'ThirstForScience - Playlists',
4751 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4752 'uploader': 'ThirstForScience',
4753 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
976ae3ea 4754 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4755 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4756 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4757 'tags': 'count:13',
4758 'channel': 'ThirstForScience',
6c73052c 4759 'channel_follower_count': int
a6213a49 4760 }
4761 }, {
4762 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4763 'only_matching': True,
4764 }, {
4765 'note': 'basic, single video playlist',
4766 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4767 'info_dict': {
4768 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4769 'uploader': 'Sergey M.',
4770 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4771 'title': 'youtube-dl public playlist',
976ae3ea 4772 'description': '',
4773 'tags': [],
4774 'view_count': int,
4775 'modified_date': '20201130',
4776 'channel': 'Sergey M.',
4777 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4778 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4779 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4780 },
4781 'playlist_count': 1,
4782 }, {
4783 'note': 'empty playlist',
4784 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4785 'info_dict': {
4786 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4787 'uploader': 'Sergey M.',
4788 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4789 'title': 'youtube-dl empty playlist',
976ae3ea 4790 'tags': [],
4791 'channel': 'Sergey M.',
4792 'description': '',
4793 'modified_date': '20160902',
4794 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4795 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4796 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
a6213a49 4797 },
4798 'playlist_count': 0,
4799 }, {
4800 'note': 'Home tab',
4801 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
4802 'info_dict': {
4803 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4804 'title': 'lex will - Home',
4805 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4806 'uploader': 'lex will',
4807 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4808 'channel': 'lex will',
4809 'tags': ['bible', 'history', 'prophesy'],
4810 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4811 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4812 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 4813 'channel_follower_count': int
a6213a49 4814 },
4815 'playlist_mincount': 2,
4816 }, {
4817 'note': 'Videos tab',
4818 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4819 'info_dict': {
4820 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4821 'title': 'lex will - Videos',
4822 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4823 'uploader': 'lex will',
4824 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4825 'tags': ['bible', 'history', 'prophesy'],
4826 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4827 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4828 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4829 'channel': 'lex will',
6c73052c 4830 'channel_follower_count': int
a6213a49 4831 },
4832 'playlist_mincount': 975,
4833 }, {
4834 'note': 'Videos tab, sorted by popular',
4835 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4836 'info_dict': {
4837 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4838 'title': 'lex will - Videos',
4839 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4840 'uploader': 'lex will',
4841 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4842 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4843 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4844 'channel': 'lex will',
4845 'tags': ['bible', 'history', 'prophesy'],
4846 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6c73052c 4847 'channel_follower_count': int
a6213a49 4848 },
4849 'playlist_mincount': 199,
4850 }, {
4851 'note': 'Playlists tab',
4852 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4853 'info_dict': {
4854 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4855 'title': 'lex will - Playlists',
4856 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4857 'uploader': 'lex will',
4858 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4859 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4860 'channel': 'lex will',
4861 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4862 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4863 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4864 'channel_follower_count': int
a6213a49 4865 },
4866 'playlist_mincount': 17,
4867 }, {
4868 'note': 'Community tab',
4869 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4870 'info_dict': {
4871 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4872 'title': 'lex will - Community',
4873 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4874 'uploader': 'lex will',
4875 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4876 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4877 'channel': 'lex will',
4878 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4879 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4880 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4881 'channel_follower_count': int
a6213a49 4882 },
4883 'playlist_mincount': 18,
4884 }, {
4885 'note': 'Channels tab',
4886 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4887 'info_dict': {
4888 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4889 'title': 'lex will - Channels',
4890 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4891 'uploader': 'lex will',
4892 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
976ae3ea 4893 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4894 'channel': 'lex will',
4895 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4896 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4897 'tags': ['bible', 'history', 'prophesy'],
6c73052c 4898 'channel_follower_count': int
a6213a49 4899 },
4900 'playlist_mincount': 12,
4901 }, {
4902 'note': 'Search tab',
4903 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4904 'playlist_mincount': 40,
4905 'info_dict': {
4906 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4907 'title': '3Blue1Brown - Search - linear algebra',
4908 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4909 'uploader': '3Blue1Brown',
4910 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
976ae3ea 4911 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4912 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4913 'tags': ['Mathematics'],
4914 'channel': '3Blue1Brown',
4915 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6c73052c 4916 'channel_follower_count': int
a6213a49 4917 },
4918 }, {
4919 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4920 'only_matching': True,
4921 }, {
4922 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4923 'only_matching': True,
4924 }, {
4925 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4926 'only_matching': True,
4927 }, {
4928 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4929 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4930 'info_dict': {
4931 'title': '29C3: Not my department',
4932 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4933 'uploader': 'Christiaan008',
4934 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4935 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
976ae3ea 4936 'tags': [],
4937 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4938 'view_count': int,
4939 'modified_date': '20150605',
4940 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4941 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4942 'channel': 'Christiaan008',
a6213a49 4943 },
4944 'playlist_count': 96,
4945 }, {
4946 'note': 'Large playlist',
4947 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4948 'info_dict': {
4949 'title': 'Uploads from Cauchemar',
4950 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4951 'uploader': 'Cauchemar',
4952 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
976ae3ea 4953 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
4954 'tags': [],
4955 'modified_date': r're:\d{8}',
4956 'channel': 'Cauchemar',
4957 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
4958 'view_count': int,
4959 'description': '',
4960 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
a6213a49 4961 },
4962 'playlist_mincount': 1123,
976ae3ea 4963 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 4964 }, {
4965 'note': 'even larger playlist, 8832 videos',
4966 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4967 'only_matching': True,
4968 }, {
4969 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4970 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4971 'info_dict': {
4972 'title': 'Uploads from Interstellar Movie',
4973 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4974 'uploader': 'Interstellar Movie',
4975 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
976ae3ea 4976 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
4977 'tags': [],
4978 'view_count': int,
4979 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4980 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
4981 'channel': 'Interstellar Movie',
4982 'description': '',
4983 'modified_date': r're:\d{8}',
a6213a49 4984 },
4985 'playlist_mincount': 21,
4986 }, {
4987 'note': 'Playlist with "show unavailable videos" button',
4988 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4989 'info_dict': {
4990 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4991 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4992 'uploader': 'Phim Siêu Nhân Nhật Bản',
4993 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
976ae3ea 4994 'view_count': int,
4995 'channel': 'Phim Siêu Nhân Nhật Bản',
4996 'tags': [],
4997 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
4998 'description': '',
4999 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5000 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5001 'modified_date': r're:\d{8}',
a6213a49 5002 },
5003 'playlist_mincount': 200,
976ae3ea 5004 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5005 }, {
5006 'note': 'Playlist with unavailable videos in page 7',
5007 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5008 'info_dict': {
5009 'title': 'Uploads from BlankTV',
5010 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5011 'uploader': 'BlankTV',
5012 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
976ae3ea 5013 'channel': 'BlankTV',
5014 'channel_url': 'https://www.youtube.com/c/blanktv',
5015 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5016 'view_count': int,
5017 'tags': [],
5018 'uploader_url': 'https://www.youtube.com/c/blanktv',
5019 'modified_date': r're:\d{8}',
5020 'description': '',
a6213a49 5021 },
5022 'playlist_mincount': 1000,
976ae3ea 5023 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5024 }, {
5025 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5026 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5027 'info_dict': {
5028 'title': 'Data Analysis with Dr Mike Pound',
5029 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5030 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5031 'uploader': 'Computerphile',
5032 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
976ae3ea 5033 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5034 'tags': [],
5035 'view_count': int,
5036 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5037 'channel_url': 'https://www.youtube.com/user/Computerphile',
5038 'channel': 'Computerphile',
a6213a49 5039 },
5040 'playlist_mincount': 11,
5041 }, {
5042 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5043 'only_matching': True,
5044 }, {
5045 'note': 'Playlist URL that does not actually serve a playlist',
5046 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5047 'info_dict': {
5048 'id': 'FqZTN594JQw',
5049 'ext': 'webm',
5050 'title': "Smiley's People 01 detective, Adventure Series, Action",
5051 'uploader': 'STREEM',
5052 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5053 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5054 'upload_date': '20150526',
5055 'license': 'Standard YouTube License',
5056 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5057 'categories': ['People & Blogs'],
5058 'tags': list,
5059 'view_count': int,
5060 'like_count': int,
a6213a49 5061 },
5062 'params': {
5063 'skip_download': True,
5064 },
5065 'skip': 'This video is not available.',
5066 'add_ie': [YoutubeIE.ie_key()],
5067 }, {
5068 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5069 'only_matching': True,
5070 }, {
5071 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5072 'only_matching': True,
5073 }, {
5074 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5075 'info_dict': {
12a1b225 5076 'id': 'Wq15eF5vCbI', # This will keep changing
a6213a49 5077 'ext': 'mp4',
976ae3ea 5078 'title': str,
a6213a49 5079 'uploader': 'Sky News',
5080 'uploader_id': 'skynews',
5081 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5082 'upload_date': r're:\d{8}',
976ae3ea 5083 'description': str,
a6213a49 5084 'categories': ['News & Politics'],
5085 'tags': list,
5086 'like_count': int,
6c73052c 5087 'release_timestamp': 1642502819,
976ae3ea 5088 'channel': 'Sky News',
5089 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5090 'age_limit': 0,
5091 'view_count': int,
6c73052c 5092 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
976ae3ea 5093 'playable_in_embed': True,
6c73052c 5094 'release_date': '20220118',
976ae3ea 5095 'availability': 'public',
5096 'live_status': 'is_live',
5097 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
6c73052c 5098 'channel_follower_count': int
a6213a49 5099 },
5100 'params': {
5101 'skip_download': True,
5102 },
976ae3ea 5103 'expected_warnings': ['Ignoring subtitle tracks found in '],
a6213a49 5104 }, {
5105 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5106 'info_dict': {
5107 'id': 'a48o2S1cPoo',
5108 'ext': 'mp4',
5109 'title': 'The Young Turks - Live Main Show',
5110 'uploader': 'The Young Turks',
5111 'uploader_id': 'TheYoungTurks',
5112 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5113 'upload_date': '20150715',
5114 'license': 'Standard YouTube License',
5115 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5116 'categories': ['News & Politics'],
5117 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5118 'like_count': int,
a6213a49 5119 },
5120 'params': {
5121 'skip_download': True,
5122 },
5123 'only_matching': True,
5124 }, {
5125 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5126 'only_matching': True,
5127 }, {
5128 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5129 'only_matching': True,
5130 }, {
5131 'note': 'A channel that is not live. Should raise error',
5132 'url': 'https://www.youtube.com/user/numberphile/live',
5133 'only_matching': True,
5134 }, {
5135 'url': 'https://www.youtube.com/feed/trending',
5136 'only_matching': True,
5137 }, {
5138 'url': 'https://www.youtube.com/feed/library',
5139 'only_matching': True,
5140 }, {
5141 'url': 'https://www.youtube.com/feed/history',
5142 'only_matching': True,
5143 }, {
5144 'url': 'https://www.youtube.com/feed/subscriptions',
5145 'only_matching': True,
5146 }, {
5147 'url': 'https://www.youtube.com/feed/watch_later',
5148 'only_matching': True,
5149 }, {
5150 'note': 'Recommended - redirects to home page.',
5151 'url': 'https://www.youtube.com/feed/recommended',
5152 'only_matching': True,
5153 }, {
5154 'note': 'inline playlist with not always working continuations',
5155 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5156 'only_matching': True,
5157 }, {
5158 'url': 'https://www.youtube.com/course',
5159 'only_matching': True,
5160 }, {
5161 'url': 'https://www.youtube.com/zsecurity',
5162 'only_matching': True,
5163 }, {
5164 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5165 'only_matching': True,
5166 }, {
5167 'url': 'https://www.youtube.com/TheYoungTurks/live',
5168 'only_matching': True,
5169 }, {
5170 'url': 'https://www.youtube.com/hashtag/cctv9',
5171 'info_dict': {
5172 'id': 'cctv9',
5173 'title': '#cctv9',
976ae3ea 5174 'tags': [],
a6213a49 5175 },
5176 'playlist_mincount': 350,
5177 }, {
5178 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5179 'only_matching': True,
5180 }, {
5181 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5182 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5183 'only_matching': True
5184 }, {
5185 'note': '/browse/ should redirect to /channel/',
5186 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5187 'only_matching': True
5188 }, {
5189 'note': 'VLPL, should redirect to playlist?list=PL...',
5190 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5191 'info_dict': {
5192 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5193 'uploader': 'NoCopyrightSounds',
5194 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5195 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
12a1b225 5196 'title': 'NCS : All Releases 💿',
976ae3ea 5197 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5198 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5199 'modified_date': r're:\d{8}',
5200 'view_count': int,
5201 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5202 'tags': [],
5203 'channel': 'NoCopyrightSounds',
a6213a49 5204 },
5205 'playlist_mincount': 166,
976ae3ea 5206 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
a6213a49 5207 }, {
5208 'note': 'Topic, should redirect to playlist?list=UU...',
5209 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5210 'info_dict': {
5211 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5212 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5213 'title': 'Uploads from Royalty Free Music - Topic',
5214 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5215 'tags': [],
5216 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5217 'channel': 'Royalty Free Music - Topic',
5218 'view_count': int,
5219 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5220 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5221 'modified_date': r're:\d{8}',
5222 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5223 'description': '',
a6213a49 5224 },
5225 'expected_warnings': [
a6213a49 5226 'The URL does not have a videos tab',
976ae3ea 5227 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5228 ],
5229 'playlist_mincount': 101,
5230 }, {
5231 'note': 'Topic without a UU playlist',
5232 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5233 'info_dict': {
5234 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5235 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
976ae3ea 5236 'tags': [],
a6213a49 5237 },
5238 'expected_warnings': [
976ae3ea 5239 'the playlist redirect gave error',
a6213a49 5240 ],
5241 'playlist_mincount': 9,
5242 }, {
5243 'note': 'Youtube music Album',
5244 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5245 'info_dict': {
5246 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5247 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
976ae3ea 5248 'tags': [],
5249 'view_count': int,
5250 'description': '',
5251 'availability': 'unlisted',
5252 'modified_date': r're:\d{8}',
a6213a49 5253 },
5254 'playlist_count': 50,
5255 }, {
5256 'note': 'unlisted single video playlist',
5257 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5258 'info_dict': {
5259 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5260 'uploader': 'colethedj',
5261 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5262 'title': 'yt-dlp unlisted playlist test',
976ae3ea 5263 'availability': 'unlisted',
5264 'tags': [],
12a1b225 5265 'modified_date': '20220418',
976ae3ea 5266 'channel': 'colethedj',
5267 'view_count': int,
5268 'description': '',
5269 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5270 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5271 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
a6213a49 5272 },
5273 'playlist_count': 1,
5274 }, {
5275 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5276 'url': 'https://www.youtube.com/feed/recommended',
5277 'info_dict': {
5278 'id': 'recommended',
5279 'title': 'recommended',
6c73052c 5280 'tags': [],
a6213a49 5281 },
5282 'playlist_mincount': 50,
5283 'params': {
5284 'skip_download': True,
5285 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5286 },
5287 }, {
5288 'note': 'API Fallback: /videos tab, sorted by oldest first',
5289 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5290 'info_dict': {
5291 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5292 'title': 'Cody\'sLab - Videos',
5293 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5294 'uploader': 'Cody\'sLab',
5295 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
976ae3ea 5296 'channel': 'Cody\'sLab',
5297 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5298 'tags': [],
5299 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5300 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6c73052c 5301 'channel_follower_count': int
a6213a49 5302 },
5303 'playlist_mincount': 650,
5304 'params': {
5305 'skip_download': True,
5306 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5307 },
5308 }, {
5309 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5310 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5311 'info_dict': {
5312 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5313 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5314 'title': 'Uploads from Royalty Free Music - Topic',
5315 'uploader': 'Royalty Free Music - Topic',
976ae3ea 5316 'modified_date': r're:\d{8}',
5317 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5318 'description': '',
5319 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5320 'tags': [],
5321 'channel': 'Royalty Free Music - Topic',
5322 'view_count': int,
5323 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
a6213a49 5324 },
5325 'expected_warnings': [
976ae3ea 5326 'does not have a videos tab',
5327 r'[Uu]navailable videos (are|will be) hidden',
a6213a49 5328 ],
5329 'playlist_mincount': 101,
5330 'params': {
5331 'skip_download': True,
5332 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5333 },
7c219ea6 5334 }, {
5335 'note': 'non-standard redirect to regional channel',
5336 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5337 'only_matching': True
61d3665d 5338 }, {
5339 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5340 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5341 'info_dict': {
5342 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5343 'modified_date': '20220407',
5344 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5345 'tags': [],
5346 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5347 'uploader': 'pukkandan',
5348 'availability': 'unlisted',
5349 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5350 'channel': 'pukkandan',
5351 'description': 'Test for collaborative playlist',
5352 'title': 'yt-dlp test - collaborative playlist',
12a1b225 5353 'view_count': int,
61d3665d 5354 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5355 },
5356 'playlist_mincount': 2
a6213a49 5357 }]
5358
5359 @classmethod
5360 def suitable(cls, url):
86e5f3ed 5361 return False if YoutubeIE.suitable(url) else super().suitable(url)
9297939e 5362
64f36541 5363 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
fe03a6cd 5364
182bda88 5365 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5366 def _real_extract(self, url, smuggled_data):
cd7c66cf 5367 item_id = self._match_id(url)
14f25df2 5368 url = urllib.parse.urlunparse(
5369 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 5370 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 5371
fe03a6cd 5372 def get_mobj(url):
37e57a9f 5373 mobj = self._URL_RE.match(url).groupdict()
07cce701 5374 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 5375 return mobj
5376
37e57a9f 5377 mobj, redirect_warning = get_mobj(url), None
fe03a6cd 5378 # Youtube returns incomplete data if tabname is not lower case
5379 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
fe03a6cd 5380 if is_channel:
5381 if smuggled_data.get('is_music_url'):
37e57a9f 5382 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
fe03a6cd 5383 item_id = item_id[2:]
37e57a9f 5384 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5385 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
ac56cf38 5386 mdata = self._extract_tab_endpoint(
37e57a9f 5387 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5388 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
14f25df2 5389 get_all=False, expected_type=str)
ac56cf38 5390 if not murl:
37e57a9f 5391 raise ExtractorError('Failed to resolve album to playlist')
ac56cf38 5392 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
37e57a9f 5393 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5394 pre = f'https://www.youtube.com/channel/{item_id}'
5395
64f36541 5396 original_tab_name = tab
fe03a6cd 5397 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5398 # Home URLs should redirect to /videos/
37e57a9f 5399 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5400 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 5401 tab = '/videos'
5402
5403 url = ''.join((pre, tab, post))
5404 mobj = get_mobj(url)
cd7c66cf 5405
5406 # Handle both video/playlist URLs
201c1459 5407 qs = parse_qs(url)
86e5f3ed 5408 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
cd7c66cf 5409
fe03a6cd 5410 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 5411 if not playlist_id:
fe03a6cd 5412 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 5413 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 5414 # Common mistake: https://www.youtube.com/watch?list=playlist_id
37e57a9f 5415 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5416 url = f'https://www.youtube.com/playlist?list={playlist_id}'
18db7548 5417 mobj = get_mobj(url)
cd7c66cf 5418
5419 if video_id and playlist_id:
a06916d9 5420 if self.get_param('noplaylist'):
37e57a9f 5421 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5422 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5423 ie=YoutubeIE.ie_key(), video_id=video_id)
5424 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
cd7c66cf 5425
ac56cf38 5426 data, ytcfg = self._extract_data(url, item_id)
14fdfea9 5427
7c219ea6 5428 # YouTube may provide a non-standard redirect to the regional channel
5429 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5430 redirect_url = traverse_obj(
5431 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5432 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5433 redirect_url = ''.join((
5434 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5435 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5436 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5437
37e57a9f 5438 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
18db7548 5439 if tabs:
5440 selected_tab = self._extract_selected_tab(tabs)
64f36541 5441 selected_tab_name = selected_tab.get('title', '').lower()
5442 if selected_tab_name == 'home':
5443 selected_tab_name = 'featured'
5444 requested_tab_name = mobj['tab'][1:]
09f1580e 5445 if 'no-youtube-channel-redirect' not in compat_opts:
693f0600 5446 if requested_tab_name == 'live': # Live tab should have redirected to the video
5447 raise UserNotLive(video_id=mobj['id'])
64f36541 5448 if requested_tab_name not in ('', selected_tab_name):
5449 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5450 if not original_tab_name:
5451 if item_id[:2] == 'UC':
5452 # Topic channels don't have /videos. Use the equivalent playlist instead
5453 pl_id = f'UU{item_id[2:]}'
5454 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5455 try:
5456 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5457 except ExtractorError:
5458 redirect_warning += ' and the playlist redirect gave error'
5459 else:
5460 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5461 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5462 if selected_tab_name and selected_tab_name != requested_tab_name:
5463 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5464 else:
5465 raise ExtractorError(redirect_warning, expected=True)
18db7548 5466
37e57a9f 5467 if redirect_warning:
64f36541 5468 self.to_screen(redirect_warning)
37e57a9f 5469 self.write_debug(f'Final URL: {url}')
18db7548 5470
358de58c 5471 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 5472 if 'no-youtube-unavailable-videos' not in compat_opts:
ac56cf38 5473 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
c0ac49bc 5474 self._extract_and_report_alerts(data, only_once=True)
37e57a9f 5475 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
8bdd16b4 5476 if tabs:
ac56cf38 5477 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
cd7c66cf 5478
37e57a9f 5479 playlist = traverse_obj(
5480 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
8bdd16b4 5481 if playlist:
ac56cf38 5482 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
cd7c66cf 5483
37e57a9f 5484 video_id = traverse_obj(
5485 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
8bdd16b4 5486 if video_id:
09f1580e 5487 if mobj['tab'] != '/live': # live tab is expected to redirect to video
37e57a9f 5488 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5489 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5490 ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 5491
8bdd16b4 5492 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 5493
c5e8d7af 5494
8bdd16b4 5495class YoutubePlaylistIE(InfoExtractor):
96565c7e 5496 IE_DESC = 'YouTube playlists'
8bdd16b4 5497 _VALID_URL = r'''(?x)(?:
5498 (?:https?://)?
5499 (?:\w+\.)?
5500 (?:
5501 (?:
5502 youtube(?:kids)?\.com|
d9190e44 5503 %(invidious)s
8bdd16b4 5504 )
5505 /.*?\?.*?\blist=
5506 )?
5507 (?P<id>%(playlist_id)s)
d9190e44
RH
5508 )''' % {
5509 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5510 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5511 }
8bdd16b4 5512 IE_NAME = 'youtube:playlist'
cdc628a4 5513 _TESTS = [{
8bdd16b4 5514 'note': 'issue #673',
5515 'url': 'PLBB231211A4F62143',
cdc628a4 5516 'info_dict': {
8bdd16b4 5517 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5518 'id': 'PLBB231211A4F62143',
976ae3ea 5519 'uploader': 'Wickman',
8bdd16b4 5520 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 5521 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
976ae3ea 5522 'view_count': int,
5523 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5524 'modified_date': r're:\d{8}',
5525 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5526 'channel': 'Wickman',
5527 'tags': [],
5528 'channel_url': 'https://www.youtube.com/user/Wickydoo',
8bdd16b4 5529 },
5530 'playlist_mincount': 29,
5531 }, {
5532 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5533 'info_dict': {
5534 'title': 'YDL_safe_search',
5535 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5536 },
5537 'playlist_count': 2,
5538 'skip': 'This playlist is private',
9558dcec 5539 }, {
8bdd16b4 5540 'note': 'embedded',
5541 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5542 'playlist_count': 4,
9558dcec 5543 'info_dict': {
8bdd16b4 5544 'title': 'JODA15',
5545 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5546 'uploader': 'milan',
5547 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
976ae3ea 5548 'description': '',
5549 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5550 'tags': [],
5551 'modified_date': '20140919',
5552 'view_count': int,
5553 'channel': 'milan',
5554 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5555 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5556 },
5557 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
cdc628a4 5558 }, {
8bdd16b4 5559 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
12a1b225 5560 'playlist_mincount': 455,
8bdd16b4 5561 'info_dict': {
5562 'title': '2018 Chinese New Singles (11/6 updated)',
5563 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5564 'uploader': 'LBK',
5565 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 5566 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
976ae3ea 5567 'channel': 'LBK',
5568 'view_count': int,
5569 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5570 'tags': [],
5571 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5572 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5573 'modified_date': r're:\d{8}',
5574 },
5575 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
daa0df9e 5576 }, {
29f7c58a 5577 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5578 'only_matching': True,
5579 }, {
5580 # music album playlist
5581 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5582 'only_matching': True,
5583 }]
5584
5585 @classmethod
5586 def suitable(cls, url):
201c1459 5587 if YoutubeTabIE.suitable(url):
5588 return False
49a57e70 5589 from ..utils import parse_qs
201c1459 5590 qs = parse_qs(url)
5591 if qs.get('v', [None])[0]:
5592 return False
86e5f3ed 5593 return super().suitable(url)
29f7c58a 5594
5595 def _real_extract(self, url):
5596 playlist_id = self._match_id(url)
46953e7e 5597 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 5598 url = update_url_query(
5599 'https://www.youtube.com/playlist',
5600 parse_qs(url) or {'list': playlist_id})
5601 if is_music_url:
5602 url = smuggle_url(url, {'is_music_url': True})
5603 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 5604
5605
5606class YoutubeYtBeIE(InfoExtractor):
c76eb41b 5607 IE_DESC = 'youtu.be'
29f7c58a 5608 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5609 _TESTS = [{
8bdd16b4 5610 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5611 'info_dict': {
5612 'id': 'yeWKywCrFtk',
5613 'ext': 'mp4',
5614 'title': 'Small Scale Baler and Braiding Rugs',
5615 'uploader': 'Backus-Page House Museum',
5616 'uploader_id': 'backuspagemuseum',
5617 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5618 'upload_date': '20161008',
5619 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5620 'categories': ['Nonprofits & Activism'],
5621 'tags': list,
5622 'like_count': int,
976ae3ea 5623 'age_limit': 0,
5624 'playable_in_embed': True,
5625 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5626 'channel': 'Backus-Page House Museum',
5627 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5628 'live_status': 'not_live',
5629 'view_count': int,
5630 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5631 'availability': 'public',
5632 'duration': 59,
12a1b225
A
5633 'comment_count': int,
5634 'channel_follower_count': int
8bdd16b4 5635 },
5636 'params': {
5637 'noplaylist': True,
5638 'skip_download': True,
5639 },
39e7107d 5640 }, {
8bdd16b4 5641 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 5642 'only_matching': True,
cdc628a4
PH
5643 }]
5644
8bdd16b4 5645 def _real_extract(self, url):
5ad28e7f 5646 mobj = self._match_valid_url(url)
29f7c58a 5647 video_id = mobj.group('id')
5648 playlist_id = mobj.group('playlist_id')
8bdd16b4 5649 return self.url_result(
29f7c58a 5650 update_url_query('https://www.youtube.com/watch', {
5651 'v': video_id,
5652 'list': playlist_id,
5653 'feature': 'youtu.be',
5654 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 5655
5656
b6ce9bb0 5657class YoutubeLivestreamEmbedIE(InfoExtractor):
5658 IE_DESC = 'YouTube livestream embeds'
5659 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5660 _TESTS = [{
5661 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5662 'only_matching': True,
5663 }]
5664
5665 def _real_extract(self, url):
5666 channel_id = self._match_id(url)
5667 return self.url_result(
5668 f'https://www.youtube.com/channel/{channel_id}/live',
5669 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5670
5671
8bdd16b4 5672class YoutubeYtUserIE(InfoExtractor):
96565c7e 5673 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
b6ce9bb0 5674 IE_NAME = 'youtube:user'
8bdd16b4 5675 _VALID_URL = r'ytuser:(?P<id>.+)'
5676 _TESTS = [{
5677 'url': 'ytuser:phihag',
5678 'only_matching': True,
5679 }]
5680
5681 def _real_extract(self, url):
5682 user_id = self._match_id(url)
5683 return self.url_result(
c586f9e8 5684 'https://www.youtube.com/user/%s/videos' % user_id,
8bdd16b4 5685 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 5686
b05654f0 5687
3d3dddc9 5688class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 5689 IE_NAME = 'youtube:favorites'
96565c7e 5690 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
70d5c17b 5691 _VALID_URL = r':ytfav(?:ou?rite)?s?'
5692 _LOGIN_REQUIRED = True
5693 _TESTS = [{
5694 'url': ':ytfav',
5695 'only_matching': True,
5696 }, {
5697 'url': ':ytfavorites',
5698 'only_matching': True,
5699 }]
5700
5701 def _real_extract(self, url):
5702 return self.url_result(
5703 'https://www.youtube.com/playlist?list=LL',
5704 ie=YoutubeTabIE.ie_key())
5705
5706
ca5300c7 5707class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
5708 IE_NAME = 'youtube:notif'
5709 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
5710 _VALID_URL = r':ytnotif(?:ication)?s?'
5711 _LOGIN_REQUIRED = True
5712 _TESTS = [{
5713 'url': ':ytnotif',
5714 'only_matching': True,
5715 }, {
5716 'url': ':ytnotifications',
5717 'only_matching': True,
5718 }]
5719
5720 def _extract_notification_menu(self, response, continuation_list):
5721 notification_list = traverse_obj(
5722 response,
5723 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
5724 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
5725 expected_type=list) or []
5726 continuation_list[0] = None
5727 for item in notification_list:
5728 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
5729 if entry:
5730 yield entry
5731 continuation = item.get('continuationItemRenderer')
5732 if continuation:
5733 continuation_list[0] = continuation
5734
5735 def _extract_notification_renderer(self, notification):
5736 video_id = traverse_obj(
5737 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
5738 url = f'https://www.youtube.com/watch?v={video_id}'
5739 channel_id = None
5740 if not video_id:
5741 browse_ep = traverse_obj(
5742 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
5743 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
5744 post_id = self._search_regex(
5745 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
5746 'post id', default=None)
5747 if not channel_id or not post_id:
5748 return
5749 # The direct /post url redirects to this in the browser
5750 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
5751
5752 channel = traverse_obj(
5753 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
5754 expected_type=str)
c7a7baaa 5755 notification_title = self._get_text(notification, 'shortMessage')
5756 if notification_title:
5757 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
5758 # TODO: handle recommended videos
ca5300c7 5759 title = self._search_regex(
c7a7baaa 5760 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
ca5300c7 5761 'video title', default=None)
ca5300c7 5762 upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
5763 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
5764 else None)
5765 return {
5766 '_type': 'url',
5767 'url': url,
5768 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
5769 'video_id': video_id,
5770 'title': title,
5771 'channel_id': channel_id,
5772 'channel': channel,
5773 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5774 'upload_date': upload_date,
5775 }
5776
5777 def _notification_menu_entries(self, ytcfg):
5778 continuation_list = [None]
5779 response = None
5780 for page in itertools.count(1):
5781 ctoken = traverse_obj(
5782 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
5783 response = self._extract_response(
5784 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
5785 ep='notification/get_notification_menu', check_get_keys='actions',
5786 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
5787 yield from self._extract_notification_menu(response, continuation_list)
5788 if not continuation_list[0]:
5789 break
5790
5791 def _real_extract(self, url):
5792 display_id = 'notifications'
5793 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
5794 self._report_playlist_authcheck(ytcfg)
5795 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
5796
5797
a6213a49 5798class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5799 IE_DESC = 'YouTube search'
78caa52a 5800 IE_NAME = 'youtube:search'
b05654f0 5801 _SEARCH_KEY = 'ytsearch'
a61fd4cf 5802 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
84bbc545 5803 _TESTS = [{
5804 'url': 'ytsearch5:youtube-dl test video',
5805 'playlist_count': 5,
5806 'info_dict': {
5807 'id': 'youtube-dl test video',
5808 'title': 'youtube-dl test video',
5809 }
5810 }]
b05654f0 5811
a61fd4cf 5812
5f7cb91a 5813class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
cb7fb546 5814 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 5815 _SEARCH_KEY = 'ytsearchdate'
a6213a49 5816 IE_DESC = 'YouTube search, newest videos first'
a61fd4cf 5817 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
84bbc545 5818 _TESTS = [{
5819 'url': 'ytsearchdate5:youtube-dl test video',
5820 'playlist_count': 5,
5821 'info_dict': {
5822 'id': 'youtube-dl test video',
5823 'title': 'youtube-dl test video',
5824 }
5825 }]
75dff0ee 5826
c9ae7b95 5827
a6213a49 5828class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
96565c7e 5829 IE_DESC = 'YouTube search URLs with sorting and filter support'
386e1dd9 5830 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
182bda88 5831 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
3462ffa8 5832 _TESTS = [{
5833 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
5834 'playlist_mincount': 5,
5835 'info_dict': {
11f9be09 5836 'id': 'youtube-dl test video',
3462ffa8 5837 'title': 'youtube-dl test video',
5838 }
a61fd4cf 5839 }, {
5840 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
5841 'playlist_mincount': 5,
5842 'info_dict': {
5843 'id': 'python',
5844 'title': 'python',
5845 }
ad210f4f 5846 }, {
5847 'url': 'https://www.youtube.com/results?search_query=%23cats',
5848 'playlist_mincount': 1,
5849 'info_dict': {
5850 'id': '#cats',
5851 'title': '#cats',
12a1b225
A
5852 # The test suite does not have support for nested playlists
5853 # 'entries': [{
5854 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
5855 # 'title': '#cats',
5856 # }],
ad210f4f 5857 },
3462ffa8 5858 }, {
5859 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
5860 'only_matching': True,
5861 }]
5862
5863 def _real_extract(self, url):
4dfbf869 5864 qs = parse_qs(url)
386e1dd9 5865 query = (qs.get('search_query') or qs.get('q'))[0]
a6213a49 5866 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
3462ffa8 5867
5868
16aa9ea4 5869class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
62b58c09 5870 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
16aa9ea4 5871 IE_NAME = 'youtube:music:search_url'
5872 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5873 _TESTS = [{
5874 'url': 'https://music.youtube.com/search?q=royalty+free+music',
5875 'playlist_count': 16,
5876 'info_dict': {
5877 'id': 'royalty free music',
5878 'title': 'royalty free music',
5879 }
5880 }, {
5881 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
5882 'playlist_mincount': 30,
5883 'info_dict': {
5884 'id': 'royalty free music - songs',
5885 'title': 'royalty free music - songs',
5886 },
5887 'params': {'extract_flat': 'in_playlist'}
5888 }, {
5889 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
5890 'playlist_mincount': 30,
5891 'info_dict': {
5892 'id': 'royalty free music - community playlists',
5893 'title': 'royalty free music - community playlists',
5894 },
5895 'params': {'extract_flat': 'in_playlist'}
5896 }]
5897
5898 _SECTIONS = {
5899 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
5900 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
5901 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
5902 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
5903 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
5904 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
5905 }
5906
5907 def _real_extract(self, url):
5908 qs = parse_qs(url)
5909 query = (qs.get('search_query') or qs.get('q'))[0]
5910 params = qs.get('sp', (None,))[0]
5911 if params:
5912 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
5913 else:
ac668111 5914 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
16aa9ea4 5915 params = self._SECTIONS.get(section)
5916 if not params:
5917 section = None
5918 title = join_nonempty(query, section, delim=' - ')
af5c1c55 5919 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
16aa9ea4 5920
5921
182bda88 5922class YoutubeFeedsInfoExtractor(InfoExtractor):
d7ae0639 5923 """
25f14e9f 5924 Base class for feed extractors
82d02080 5925 Subclasses must re-define the _FEED_NAME property.
d7ae0639 5926 """
b2e8bc1b 5927 _LOGIN_REQUIRED = True
82d02080 5928 _FEED_NAME = 'feeds'
a25bca9f 5929
5930 def _real_initialize(self):
5931 YoutubeBaseInfoExtractor._check_login_required(self)
d7ae0639 5932
82d02080 5933 @classproperty
d7ae0639 5934 def IE_NAME(self):
82d02080 5935 return f'youtube:{self._FEED_NAME}'
04cc9617 5936
3853309f 5937 def _real_extract(self, url):
3d3dddc9 5938 return self.url_result(
182bda88 5939 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
25f14e9f
S
5940
5941
ef2f3c7f 5942class YoutubeWatchLaterIE(InfoExtractor):
5943 IE_NAME = 'youtube:watchlater'
96565c7e 5944 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
3d3dddc9 5945 _VALID_URL = r':ytwatchlater'
bc7a9cd8 5946 _TESTS = [{
8bdd16b4 5947 'url': ':ytwatchlater',
bc7a9cd8
S
5948 'only_matching': True,
5949 }]
25f14e9f
S
5950
5951 def _real_extract(self, url):
ef2f3c7f 5952 return self.url_result(
5953 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 5954
5955
25f14e9f 5956class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
96565c7e 5957 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
3d3dddc9 5958 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 5959 _FEED_NAME = 'recommended'
45db527f 5960 _LOGIN_REQUIRED = False
3d3dddc9 5961 _TESTS = [{
5962 'url': ':ytrec',
5963 'only_matching': True,
5964 }, {
5965 'url': ':ytrecommended',
5966 'only_matching': True,
5967 }, {
5968 'url': 'https://youtube.com',
5969 'only_matching': True,
5970 }]
1ed5b5c9 5971
1ed5b5c9 5972
25f14e9f 5973class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
96565c7e 5974 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
3d3dddc9 5975 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 5976 _FEED_NAME = 'subscriptions'
3d3dddc9 5977 _TESTS = [{
5978 'url': ':ytsubs',
5979 'only_matching': True,
5980 }, {
5981 'url': ':ytsubscriptions',
5982 'only_matching': True,
5983 }]
1ed5b5c9 5984
1ed5b5c9 5985
25f14e9f 5986class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
96565c7e 5987 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
a5c56234 5988 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 5989 _FEED_NAME = 'history'
3d3dddc9 5990 _TESTS = [{
5991 'url': ':ythistory',
5992 'only_matching': True,
5993 }]
1ed5b5c9
JMF
5994
5995
6e634cbe 5996class YoutubeStoriesIE(InfoExtractor):
5997 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
5998 IE_NAME = 'youtube:stories'
5999 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6000 _TESTS = [{
6001 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6002 'only_matching': True,
6003 }]
6004
6005 def _real_extract(self, url):
6006 playlist_id = f'RLTD{self._match_id(url)}'
6007 return self.url_result(
6008 f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',
6009 ie=YoutubeTabIE, video_id=playlist_id)
6010
6011
15870e90
PH
6012class YoutubeTruncatedURLIE(InfoExtractor):
6013 IE_NAME = 'youtube:truncated_url'
6014 IE_DESC = False # Do not list
975d35db 6015 _VALID_URL = r'''(?x)
b95aab84
PH
6016 (?:https?://)?
6017 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6018 (?:watch\?(?:
c4808c60 6019 feature=[a-z_]+|
b95aab84
PH
6020 annotation_id=annotation_[^&]+|
6021 x-yt-cl=[0-9]+|
c1708b89 6022 hl=[^&]*|
287be8c6 6023 t=[0-9]+
b95aab84
PH
6024 )?
6025 |
6026 attribution_link\?a=[^&]+
6027 )
6028 $
975d35db 6029 '''
15870e90 6030
c4808c60 6031 _TESTS = [{
2d3d2997 6032 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 6033 'only_matching': True,
dc2fc736 6034 }, {
2d3d2997 6035 'url': 'https://www.youtube.com/watch?',
dc2fc736 6036 'only_matching': True,
b95aab84
PH
6037 }, {
6038 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6039 'only_matching': True,
6040 }, {
6041 'url': 'https://www.youtube.com/watch?feature=foo',
6042 'only_matching': True,
c1708b89
PH
6043 }, {
6044 'url': 'https://www.youtube.com/watch?hl=en-GB',
6045 'only_matching': True,
287be8c6
PH
6046 }, {
6047 'url': 'https://www.youtube.com/watch?t=2372',
6048 'only_matching': True,
c4808c60
PH
6049 }]
6050
15870e90
PH
6051 def _real_extract(self, url):
6052 raise ExtractorError(
78caa52a
PH
6053 'Did you forget to quote the URL? Remember that & is a meta '
6054 'character in most shells, so you want to put the URL in quotes, '
3867038a 6055 'like youtube-dl '
2d3d2997 6056 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 6057 ' or simply youtube-dl BaW_jenozKc .',
15870e90 6058 expected=True)
772fd5cc
PH
6059
6060
471d0367 6061class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
3cd786db 6062 IE_NAME = 'youtube:clip'
471d0367 6063 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6064 _TESTS = [{
6065 # FIXME: Other metadata should be extracted from the clip, not from the base video
6066 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6067 'info_dict': {
6068 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6069 'ext': 'mp4',
6070 'section_start': 29.0,
6071 'section_end': 39.7,
6072 'duration': 10.7,
12a1b225
A
6073 'age_limit': 0,
6074 'availability': 'public',
6075 'categories': ['Gaming'],
6076 'channel': 'Scott The Woz',
6077 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6078 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6079 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6080 'like_count': int,
6081 'playable_in_embed': True,
6082 'tags': 'count:17',
6083 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6084 'title': 'Mobile Games on Console - Scott The Woz',
6085 'upload_date': '20210920',
6086 'uploader': 'Scott The Woz',
6087 'uploader_id': 'scottthewoz',
6088 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6089 'view_count': int,
6090 'live_status': 'not_live',
6091 'channel_follower_count': int
471d0367 6092 }
6093 }]
3cd786db 6094
6095 def _real_extract(self, url):
471d0367 6096 clip_id = self._match_id(url)
6097 _, data = self._extract_webpage(url, clip_id)
6098
6099 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6100 if not video_id:
6101 raise ExtractorError('Unable to find video ID')
6102
6103 clip_data = traverse_obj(data, (
6104 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6105 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6106 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6107 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6108
6109 return {
6110 '_type': 'url_transparent',
6111 'url': f'https://www.youtube.com/watch?v={video_id}',
6112 'ie_key': YoutubeIE.ie_key(),
6113 'id': clip_id,
6114 'section_start': int(clip_data['startTimeMs']) / 1000,
6115 'section_end': int(clip_data['endTimeMs']) / 1000,
6116 }
3cd786db 6117
6118
772fd5cc
PH
6119class YoutubeTruncatedIDIE(InfoExtractor):
6120 IE_NAME = 'youtube:truncated_id'
6121 IE_DESC = False # Do not list
b95aab84 6122 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
6123
6124 _TESTS = [{
6125 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6126 'only_matching': True,
6127 }]
6128
6129 def _real_extract(self, url):
6130 video_id = self._match_id(url)
6131 raise ExtractorError(
86e5f3ed 6132 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
772fd5cc 6133 expected=True)