]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 import base64
2 import calendar
3 import collections
4 import copy
5 import datetime as dt
6 import enum
7 import hashlib
8 import itertools
9 import json
10 import math
11 import os.path
12 import random
13 import re
14 import shlex
15 import sys
16 import threading
17 import time
18 import traceback
19 import urllib.parse
20
21 from .common import InfoExtractor, SearchInfoExtractor
22 from .openload import PhantomJSwrapper
23 from ..compat import functools
24 from ..jsinterp import JSInterpreter
25 from ..networking.exceptions import HTTPError, network_exceptions
26 from ..utils import (
27 NO_DEFAULT,
28 ExtractorError,
29 LazyList,
30 UserNotLive,
31 bug_reports_message,
32 classproperty,
33 clean_html,
34 datetime_from_str,
35 dict_get,
36 filesize_from_tbr,
37 filter_dict,
38 float_or_none,
39 format_field,
40 get_first,
41 int_or_none,
42 is_html,
43 join_nonempty,
44 js_to_json,
45 mimetype2ext,
46 orderedSet,
47 parse_codecs,
48 parse_count,
49 parse_duration,
50 parse_iso8601,
51 parse_qs,
52 qualities,
53 remove_start,
54 smuggle_url,
55 str_or_none,
56 str_to_int,
57 strftime_or_none,
58 traverse_obj,
59 try_call,
60 try_get,
61 unescapeHTML,
62 unified_strdate,
63 unified_timestamp,
64 unsmuggle_url,
65 update_url_query,
66 url_or_none,
67 urljoin,
68 variadic,
69 )
70
71 STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
72 # any clients starting with _ cannot be explicitly requested by the user
73 INNERTUBE_CLIENTS = {
74 'web': {
75 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
76 'INNERTUBE_CONTEXT': {
77 'client': {
78 'clientName': 'WEB',
79 'clientVersion': '2.20220801.00.00',
80 },
81 },
82 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
83 },
84 'web_embedded': {
85 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
86 'INNERTUBE_CONTEXT': {
87 'client': {
88 'clientName': 'WEB_EMBEDDED_PLAYER',
89 'clientVersion': '1.20220731.00.00',
90 },
91 },
92 'INNERTUBE_CONTEXT_CLIENT_NAME': 56,
93 },
94 'web_music': {
95 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
96 'INNERTUBE_HOST': 'music.youtube.com',
97 'INNERTUBE_CONTEXT': {
98 'client': {
99 'clientName': 'WEB_REMIX',
100 'clientVersion': '1.20220727.01.00',
101 },
102 },
103 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
104 },
105 'web_creator': {
106 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
107 'INNERTUBE_CONTEXT': {
108 'client': {
109 'clientName': 'WEB_CREATOR',
110 'clientVersion': '1.20220726.00.00',
111 },
112 },
113 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
114 },
115 'android': {
116 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
117 'INNERTUBE_CONTEXT': {
118 'client': {
119 'clientName': 'ANDROID',
120 'clientVersion': '19.09.37',
121 'androidSdkVersion': 30,
122 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip',
123 },
124 },
125 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
126 'REQUIRE_JS_PLAYER': False,
127 },
128 'android_embedded': {
129 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
130 'INNERTUBE_CONTEXT': {
131 'client': {
132 'clientName': 'ANDROID_EMBEDDED_PLAYER',
133 'clientVersion': '19.09.37',
134 'androidSdkVersion': 30,
135 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip',
136 },
137 },
138 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
139 'REQUIRE_JS_PLAYER': False,
140 },
141 'android_music': {
142 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
143 'INNERTUBE_CONTEXT': {
144 'client': {
145 'clientName': 'ANDROID_MUSIC',
146 'clientVersion': '6.42.52',
147 'androidSdkVersion': 30,
148 'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip',
149 },
150 },
151 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
152 'REQUIRE_JS_PLAYER': False,
153 },
154 'android_creator': {
155 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
156 'INNERTUBE_CONTEXT': {
157 'client': {
158 'clientName': 'ANDROID_CREATOR',
159 'clientVersion': '22.30.100',
160 'androidSdkVersion': 30,
161 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip',
162 },
163 },
164 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
165 'REQUIRE_JS_PLAYER': False,
166 },
167 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
168 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
169 'ios': {
170 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
171 'INNERTUBE_CONTEXT': {
172 'client': {
173 'clientName': 'IOS',
174 'clientVersion': '19.09.3',
175 'deviceModel': 'iPhone14,3',
176 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
177 },
178 },
179 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
180 'REQUIRE_JS_PLAYER': False,
181 },
182 'ios_embedded': {
183 'INNERTUBE_CONTEXT': {
184 'client': {
185 'clientName': 'IOS_MESSAGES_EXTENSION',
186 'clientVersion': '19.09.3',
187 'deviceModel': 'iPhone14,3',
188 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
189 },
190 },
191 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
192 'REQUIRE_JS_PLAYER': False,
193 },
194 'ios_music': {
195 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
196 'INNERTUBE_CONTEXT': {
197 'client': {
198 'clientName': 'IOS_MUSIC',
199 'clientVersion': '6.33.3',
200 'deviceModel': 'iPhone14,3',
201 'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
202 },
203 },
204 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
205 'REQUIRE_JS_PLAYER': False,
206 },
207 'ios_creator': {
208 'INNERTUBE_CONTEXT': {
209 'client': {
210 'clientName': 'IOS_CREATOR',
211 'clientVersion': '22.33.101',
212 'deviceModel': 'iPhone14,3',
213 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
214 },
215 },
216 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
217 'REQUIRE_JS_PLAYER': False,
218 },
219 # mweb has 'ultralow' formats
220 # See: https://github.com/yt-dlp/yt-dlp/pull/557
221 'mweb': {
222 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
223 'INNERTUBE_CONTEXT': {
224 'client': {
225 'clientName': 'MWEB',
226 'clientVersion': '2.20220801.00.00',
227 },
228 },
229 'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
230 },
231 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
232 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
233 'tv_embedded': {
234 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
235 'INNERTUBE_CONTEXT': {
236 'client': {
237 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
238 'clientVersion': '2.0',
239 },
240 },
241 'INNERTUBE_CONTEXT_CLIENT_NAME': 85,
242 },
243 # This client has pre-merged video+audio 720p/1080p streams
244 'mediaconnect': {
245 'INNERTUBE_CONTEXT': {
246 'client': {
247 'clientName': 'MEDIA_CONNECT_FRONTEND',
248 'clientVersion': '0.1',
249 },
250 },
251 'INNERTUBE_CONTEXT_CLIENT_NAME': 95,
252 },
253 }
254
255
256 def _split_innertube_client(client_name):
257 variant, *base = client_name.rsplit('.', 1)
258 if base:
259 return variant, base[0], variant
260 base, *variant = client_name.split('_', 1)
261 return client_name, base, variant[0] if variant else None
262
263
264 def short_client_name(client_name):
265 main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_')
266 return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
267
268
269 def build_innertube_clients():
270 THIRD_PARTY = {
271 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
272 }
273 BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')
274 priority = qualities(BASE_CLIENTS[::-1])
275
276 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
277 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
278 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
279 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
280 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
281
282 _, base_client, variant = _split_innertube_client(client)
283 ytcfg['priority'] = 10 * priority(base_client)
284
285 if not variant:
286 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
287 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
288 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
289 embedscreen['priority'] -= 3
290 elif variant == 'embedded':
291 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
292 ytcfg['priority'] -= 2
293 else:
294 ytcfg['priority'] -= 3
295
296
297 build_innertube_clients()
298
299
300 class BadgeType(enum.Enum):
301 AVAILABILITY_UNLISTED = enum.auto()
302 AVAILABILITY_PRIVATE = enum.auto()
303 AVAILABILITY_PUBLIC = enum.auto()
304 AVAILABILITY_PREMIUM = enum.auto()
305 AVAILABILITY_SUBSCRIPTION = enum.auto()
306 LIVE_NOW = enum.auto()
307 VERIFIED = enum.auto()
308
309
310 class YoutubeBaseInfoExtractor(InfoExtractor):
311 """Provide base functions for Youtube extractors"""
312
313 _RESERVED_NAMES = (
314 r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
315 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
316 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
317 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
318
319 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
320
321 # _NETRC_MACHINE = 'youtube'
322
323 # If True it will raise an error if no login info is provided
324 _LOGIN_REQUIRED = False
325
326 _INVIDIOUS_SITES = (
327 # invidious-redirect websites
328 r'(?:www\.)?redirect\.invidious\.io',
329 r'(?:(?:www|dev)\.)?invidio\.us',
330 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
331 r'(?:www\.)?invidious\.pussthecat\.org',
332 r'(?:www\.)?invidious\.zee\.li',
333 r'(?:www\.)?invidious\.ethibox\.fr',
334 r'(?:www\.)?iv\.ggtyler\.dev',
335 r'(?:www\.)?inv\.vern\.i2p',
336 r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
337 r'(?:www\.)?inv\.riverside\.rocks',
338 r'(?:www\.)?invidious\.silur\.me',
339 r'(?:www\.)?inv\.bp\.projectsegfau\.lt',
340 r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
341 r'(?:www\.)?invidious\.slipfox\.xyz',
342 r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
343 r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
344 r'(?:www\.)?invidious\.tiekoetter\.com',
345 r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
346 r'(?:www\.)?invidious\.nerdvpn\.de',
347 r'(?:www\.)?invidious\.weblibre\.org',
348 r'(?:www\.)?inv\.odyssey346\.dev',
349 r'(?:www\.)?invidious\.dhusch\.de',
350 r'(?:www\.)?iv\.melmac\.space',
351 r'(?:www\.)?watch\.thekitty\.zone',
352 r'(?:www\.)?invidious\.privacydev\.net',
353 r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
354 r'(?:www\.)?invidious\.drivet\.xyz',
355 r'(?:www\.)?vid\.priv\.au',
356 r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
357 r'(?:www\.)?inv\.vern\.cc',
358 r'(?:www\.)?invidious\.esmailelbob\.xyz',
359 r'(?:www\.)?invidious\.sethforprivacy\.com',
360 r'(?:www\.)?yt\.oelrichsgarcia\.de',
361 r'(?:www\.)?yt\.artemislena\.eu',
362 r'(?:www\.)?invidious\.flokinet\.to',
363 r'(?:www\.)?invidious\.baczek\.me',
364 r'(?:www\.)?y\.com\.sb',
365 r'(?:www\.)?invidious\.epicsite\.xyz',
366 r'(?:www\.)?invidious\.lidarshield\.cloud',
367 r'(?:www\.)?yt\.funami\.tech',
368 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
369 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
370 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
371 # youtube-dl invidious instances list
372 r'(?:(?:www|no)\.)?invidiou\.sh',
373 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
374 r'(?:www\.)?invidious\.kabi\.tk',
375 r'(?:www\.)?invidious\.mastodon\.host',
376 r'(?:www\.)?invidious\.zapashcanon\.fr',
377 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
378 r'(?:www\.)?invidious\.tinfoil-hat\.net',
379 r'(?:www\.)?invidious\.himiko\.cloud',
380 r'(?:www\.)?invidious\.reallyancient\.tech',
381 r'(?:www\.)?invidious\.tube',
382 r'(?:www\.)?invidiou\.site',
383 r'(?:www\.)?invidious\.site',
384 r'(?:www\.)?invidious\.xyz',
385 r'(?:www\.)?invidious\.nixnet\.xyz',
386 r'(?:www\.)?invidious\.048596\.xyz',
387 r'(?:www\.)?invidious\.drycat\.fr',
388 r'(?:www\.)?inv\.skyn3t\.in',
389 r'(?:www\.)?tube\.poal\.co',
390 r'(?:www\.)?tube\.connect\.cafe',
391 r'(?:www\.)?vid\.wxzm\.sx',
392 r'(?:www\.)?vid\.mint\.lgbt',
393 r'(?:www\.)?vid\.puffyan\.us',
394 r'(?:www\.)?yewtu\.be',
395 r'(?:www\.)?yt\.elukerio\.org',
396 r'(?:www\.)?yt\.lelux\.fi',
397 r'(?:www\.)?invidious\.ggc-project\.de',
398 r'(?:www\.)?yt\.maisputain\.ovh',
399 r'(?:www\.)?ytprivate\.com',
400 r'(?:www\.)?invidious\.13ad\.de',
401 r'(?:www\.)?invidious\.toot\.koeln',
402 r'(?:www\.)?invidious\.fdn\.fr',
403 r'(?:www\.)?watch\.nettohikari\.com',
404 r'(?:www\.)?invidious\.namazso\.eu',
405 r'(?:www\.)?invidious\.silkky\.cloud',
406 r'(?:www\.)?invidious\.exonip\.de',
407 r'(?:www\.)?invidious\.riverside\.rocks',
408 r'(?:www\.)?invidious\.blamefran\.net',
409 r'(?:www\.)?invidious\.moomoo\.de',
410 r'(?:www\.)?ytb\.trom\.tf',
411 r'(?:www\.)?yt\.cyberhost\.uk',
412 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
413 r'(?:www\.)?qklhadlycap4cnod\.onion',
414 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
415 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
416 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
417 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
418 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
419 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
420 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
421 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
422 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
423 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
424 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
425 r'(?:www\.)?piped\.kavin\.rocks',
426 r'(?:www\.)?piped\.tokhmi\.xyz',
427 r'(?:www\.)?piped\.syncpundit\.io',
428 r'(?:www\.)?piped\.mha\.fi',
429 r'(?:www\.)?watch\.whatever\.social',
430 r'(?:www\.)?piped\.garudalinux\.org',
431 r'(?:www\.)?piped\.rivo\.lol',
432 r'(?:www\.)?piped-libre\.kavin\.rocks',
433 r'(?:www\.)?yt\.jae\.fi',
434 r'(?:www\.)?piped\.mint\.lgbt',
435 r'(?:www\.)?il\.ax',
436 r'(?:www\.)?piped\.esmailelbob\.xyz',
437 r'(?:www\.)?piped\.projectsegfau\.lt',
438 r'(?:www\.)?piped\.privacydev\.net',
439 r'(?:www\.)?piped\.palveluntarjoaja\.eu',
440 r'(?:www\.)?piped\.smnz\.de',
441 r'(?:www\.)?piped\.adminforge\.de',
442 r'(?:www\.)?watch\.whatevertinfoil\.de',
443 r'(?:www\.)?piped\.qdi\.fi',
444 r'(?:(?:www|cf)\.)?piped\.video',
445 r'(?:www\.)?piped\.aeong\.one',
446 r'(?:www\.)?piped\.moomoo\.me',
447 r'(?:www\.)?piped\.chauvet\.pro',
448 r'(?:www\.)?watch\.leptons\.xyz',
449 r'(?:www\.)?pd\.vern\.cc',
450 r'(?:www\.)?piped\.hostux\.net',
451 r'(?:www\.)?piped\.lunar\.icu',
452 # Hyperpipe instances from https://hyperpipe.codeberg.page/
453 r'(?:www\.)?hyperpipe\.surge\.sh',
454 r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
455 r'(?:www\.)?listen\.whatever\.social',
456 r'(?:www\.)?music\.adminforge\.de',
457 )
458
459 # extracted from account/account_menu ep
460 # XXX: These are the supported YouTube UI and API languages,
461 # which is slightly different from languages supported for translation in YouTube studio
462 _SUPPORTED_LANG_CODES = [
463 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
464 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
465 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
466 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
467 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
468 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko',
469 ]
470
471 _IGNORED_WARNINGS = {
472 'Unavailable videos will be hidden during playback',
473 'Unavailable videos are hidden',
474 }
475
476 _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
477 _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
478
479 def ucid_or_none(self, ucid):
480 return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
481
482 def handle_or_none(self, handle):
483 return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)
484
485 def handle_from_url(self, url):
486 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
487 url, 'channel handle', default=None)
488
489 def ucid_from_url(self, url):
490 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
491 url, 'channel id', default=None)
492
493 @functools.cached_property
494 def _preferred_lang(self):
495 """
496 Returns a language code supported by YouTube for the user preferred language.
497 Returns None if no preferred language set.
498 """
499 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
500 if not preferred_lang:
501 return
502 if preferred_lang not in self._SUPPORTED_LANG_CODES:
503 raise ExtractorError(
504 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
505 expected=True)
506 elif preferred_lang != 'en':
507 self.report_warning(
508 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
509 return preferred_lang
510
511 def _initialize_consent(self):
512 cookies = self._get_cookies('https://www.youtube.com/')
513 if cookies.get('__Secure-3PSID'):
514 return
515 socs = cookies.get('SOCS')
516 if socs and not socs.value.startswith('CAA'): # not consented
517 return
518 self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
519
520 def _initialize_pref(self):
521 cookies = self._get_cookies('https://www.youtube.com/')
522 pref_cookie = cookies.get('PREF')
523 pref = {}
524 if pref_cookie:
525 try:
526 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
527 except ValueError:
528 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
529 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
530 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
531
532 def _real_initialize(self):
533 self._initialize_pref()
534 self._initialize_consent()
535 self._check_login_required()
536
537 def _check_login_required(self):
538 if self._LOGIN_REQUIRED and not self._cookies_passed:
539 self.raise_login_required('Login details are needed to download this content', method='cookies')
540
541 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
542 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
543
544 def _get_default_ytcfg(self, client='web'):
545 return copy.deepcopy(INNERTUBE_CLIENTS[client])
546
547 def _get_innertube_host(self, client='web'):
548 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
549
550 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
551 # try_get but with fallback to default ytcfg client values when present
552 _func = lambda y: try_get(y, getter, expected_type)
553 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
554
555 def _extract_client_name(self, ytcfg, default_client='web'):
556 return self._ytcfg_get_safe(
557 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
558 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
559
560 def _extract_client_version(self, ytcfg, default_client='web'):
561 return self._ytcfg_get_safe(
562 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
563 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
564
565 def _select_api_hostname(self, req_api_hostname, default_client=None):
566 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
567 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
568
569 def _extract_api_key(self, ytcfg=None, default_client='web'):
570 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
571
572 def _extract_context(self, ytcfg=None, default_client='web'):
573 context = get_first(
574 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
575 # Enforce language and tz for extraction
576 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
577 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
578 return context
579
580 _SAPISID = None
581
582 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
583 time_now = round(time.time())
584 if self._SAPISID is None:
585 yt_cookies = self._get_cookies('https://www.youtube.com')
586 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
587 # See: https://github.com/yt-dlp/yt-dlp/issues/393
588 sapisid_cookie = dict_get(
589 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
590 if sapisid_cookie and sapisid_cookie.value:
591 self._SAPISID = sapisid_cookie.value
592 self.write_debug('Extracted SAPISID cookie')
593 # SAPISID cookie is required if not already present
594 if not yt_cookies.get('SAPISID'):
595 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
596 self._set_cookie(
597 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
598 else:
599 self._SAPISID = False
600 if not self._SAPISID:
601 return None
602 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
603 sapisidhash = hashlib.sha1(
604 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
605 return f'SAPISIDHASH {time_now}_{sapisidhash}'
606
607 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
608 note='Downloading API JSON', errnote='Unable to download API page',
609 context=None, api_key=None, api_hostname=None, default_client='web'):
610
611 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
612 data.update(query)
613 real_headers = self.generate_api_headers(default_client=default_client)
614 real_headers.update({'content-type': 'application/json'})
615 if headers:
616 real_headers.update(headers)
617 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
618 or api_key or self._extract_api_key(default_client=default_client))
619 return self._download_json(
620 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
621 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
622 data=json.dumps(data).encode('utf8'), headers=real_headers,
623 query={'key': api_key, 'prettyPrint': 'false'})
624
625 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
626 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
627
628 @staticmethod
629 def _extract_session_index(*data):
630 """
631 Index of current account in account list.
632 See: https://github.com/yt-dlp/yt-dlp/pull/519
633 """
634 for ytcfg in data:
635 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
636 if session_index is not None:
637 return session_index
638
639 # Deprecated?
640 def _extract_identity_token(self, ytcfg=None, webpage=None):
641 if ytcfg:
642 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
643 if token:
644 return token
645 if webpage:
646 return self._search_regex(
647 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
648 'identity token', default=None, fatal=False)
649
650 @staticmethod
651 def _extract_account_syncid(*args):
652 """
653 Extract syncId required to download private playlists of secondary channels
654 @params response and/or ytcfg
655 """
656 for data in args:
657 # ytcfg includes channel_syncid if on secondary channel
658 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
659 if delegated_sid:
660 return delegated_sid
661 sync_ids = (try_get(
662 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
663 lambda x: x['DATASYNC_ID']), str) or '').split('||')
664 if len(sync_ids) >= 2 and sync_ids[1]:
665 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
666 # and just "user_syncid||" for primary channel. We only want the channel_syncid
667 return sync_ids[0]
668
669 @staticmethod
670 def _extract_visitor_data(*args):
671 """
672 Extracts visitorData from an API response or ytcfg
673 Appears to be used to track session state
674 """
675 return get_first(
676 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
677 expected_type=str)
678
679 @functools.cached_property
680 def is_authenticated(self):
681 return bool(self._generate_sapisidhash_header())
682
683 def extract_ytcfg(self, video_id, webpage):
684 if not webpage:
685 return {}
686 return self._parse_json(
687 self._search_regex(
688 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
689 default='{}'), video_id, fatal=False) or {}
690
691 def generate_api_headers(
692 self, *, ytcfg=None, account_syncid=None, session_index=None,
693 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
694
695 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
696 headers = {
697 'X-YouTube-Client-Name': str(
698 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
699 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
700 'Origin': origin,
701 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
702 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
703 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
704 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client),
705 }
706 if session_index is None:
707 session_index = self._extract_session_index(ytcfg)
708 if account_syncid or session_index is not None:
709 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
710
711 auth = self._generate_sapisidhash_header(origin)
712 if auth is not None:
713 headers['Authorization'] = auth
714 headers['X-Origin'] = origin
715 return filter_dict(headers)
716
717 def _download_ytcfg(self, client, video_id):
718 url = {
719 'web': 'https://www.youtube.com',
720 'web_music': 'https://music.youtube.com',
721 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1',
722 }.get(client)
723 if not url:
724 return {}
725 webpage = self._download_webpage(
726 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
727 return self.extract_ytcfg(video_id, webpage) or {}
728
729 @staticmethod
730 def _build_api_continuation_query(continuation, ctp=None):
731 query = {
732 'continuation': continuation,
733 }
734 # TODO: Inconsistency with clickTrackingParams.
735 # Currently we have a fixed ctp contained within context (from ytcfg)
736 # and a ctp in root query for continuation.
737 if ctp:
738 query['clickTracking'] = {'clickTrackingParams': ctp}
739 return query
740
741 @classmethod
742 def _extract_next_continuation_data(cls, renderer):
743 next_continuation = try_get(
744 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
745 lambda x: x['continuation']['reloadContinuationData']), dict)
746 if not next_continuation:
747 return
748 continuation = next_continuation.get('continuation')
749 if not continuation:
750 return
751 ctp = next_continuation.get('clickTrackingParams')
752 return cls._build_api_continuation_query(continuation, ctp)
753
754 @classmethod
755 def _extract_continuation_ep_data(cls, continuation_ep: dict):
756 if isinstance(continuation_ep, dict):
757 continuation = try_get(
758 continuation_ep, lambda x: x['continuationCommand']['token'], str)
759 if not continuation:
760 return
761 ctp = continuation_ep.get('clickTrackingParams')
762 return cls._build_api_continuation_query(continuation, ctp)
763
764 @classmethod
765 def _extract_continuation(cls, renderer):
766 next_continuation = cls._extract_next_continuation_data(renderer)
767 if next_continuation:
768 return next_continuation
769
770 return traverse_obj(renderer, (
771 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
772 ('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
773 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
774
775 @classmethod
776 def _extract_alerts(cls, data):
777 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
778 if not isinstance(alert_dict, dict):
779 continue
780 for alert in alert_dict.values():
781 alert_type = alert.get('type')
782 if not alert_type:
783 continue
784 message = cls._get_text(alert, 'text')
785 if message:
786 yield alert_type, message
787
788 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
789 errors, warnings = [], []
790 for alert_type, alert_message in alerts:
791 if alert_type.lower() == 'error' and fatal:
792 errors.append([alert_type, alert_message])
793 elif alert_message not in self._IGNORED_WARNINGS:
794 warnings.append([alert_type, alert_message])
795
796 for alert_type, alert_message in (warnings + errors[:-1]):
797 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
798 if errors:
799 raise ExtractorError(f'YouTube said: {errors[-1][1]}', expected=expected)
800
801 def _extract_and_report_alerts(self, data, *args, **kwargs):
802 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
803
804 def _extract_badges(self, badge_list: list):
805 """
806 Extract known BadgeType's from a list of badge renderers.
807 @returns [{'type': BadgeType}]
808 """
809 icon_type_map = {
810 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
811 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
812 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
813 'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
814 'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
815 'CHECK': BadgeType.VERIFIED,
816 }
817
818 badge_style_map = {
819 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
820 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
821 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
822 'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
823 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
824 }
825
826 label_map = {
827 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
828 'private': BadgeType.AVAILABILITY_PRIVATE,
829 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
830 'live': BadgeType.LIVE_NOW,
831 'premium': BadgeType.AVAILABILITY_PREMIUM,
832 'verified': BadgeType.VERIFIED,
833 'official artist channel': BadgeType.VERIFIED,
834 }
835
836 badges = []
837 for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
838 badge_type = (
839 icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
840 or badge_style_map.get(traverse_obj(badge, 'style'))
841 )
842 if badge_type:
843 badges.append({'type': badge_type})
844 continue
845
846 # fallback, won't work in some languages
847 label = traverse_obj(
848 badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
849 for match, label_badge_type in label_map.items():
850 if match in label.lower():
851 badges.append({'type': label_badge_type})
852 break
853
854 return badges
855
856 @staticmethod
857 def _has_badge(badges, badge_type):
858 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
859
860 @staticmethod
861 def _get_text(data, *path_list, max_runs=None):
862 for path in path_list or [None]:
863 if path is None:
864 obj = [data]
865 else:
866 obj = traverse_obj(data, path, default=[])
867 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
868 obj = [obj]
869 for item in obj:
870 text = try_get(item, lambda x: x['simpleText'], str)
871 if text:
872 return text
873 runs = try_get(item, lambda x: x['runs'], list) or []
874 if not runs and isinstance(item, list):
875 runs = item
876
877 runs = runs[:min(len(runs), max_runs or len(runs))]
878 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))
879 if text:
880 return text
881
882 def _get_count(self, data, *path_list):
883 count_text = self._get_text(data, *path_list) or ''
884 count = parse_count(count_text)
885 if count is None:
886 count = str_to_int(
887 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
888 return count
889
890 @staticmethod
891 def _extract_thumbnails(data, *path_list, final_key='thumbnails'):
892 """
893 Extract thumbnails from thumbnails dict
894 @param path_list: path list to level that contains 'thumbnails' key
895 """
896 thumbnails = []
897 for path in path_list or [()]:
898 for thumbnail in traverse_obj(data, (*variadic(path), final_key, ...)):
899 thumbnail_url = url_or_none(thumbnail.get('url'))
900 if not thumbnail_url:
901 continue
902 # Sometimes youtube gives a wrong thumbnail URL. See:
903 # https://github.com/yt-dlp/yt-dlp/issues/233
904 # https://github.com/ytdl-org/youtube-dl/issues/28023
905 if 'maxresdefault' in thumbnail_url:
906 thumbnail_url = thumbnail_url.split('?')[0]
907 thumbnails.append({
908 'url': thumbnail_url,
909 'height': int_or_none(thumbnail.get('height')),
910 'width': int_or_none(thumbnail.get('width')),
911 })
912 return thumbnails
913
914 @staticmethod
915 def extract_relative_time(relative_time_text):
916 """
917 Extracts a relative time from string and converts to dt object
918 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
919 """
920
921 # XXX: this could be moved to a general function in utils/_utils.py
922 # The relative time text strings are roughly the same as what
923 # Javascript's Intl.RelativeTimeFormat function generates.
924 # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
925 mobj = re.search(
926 r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
927 relative_time_text)
928 if mobj:
929 start = mobj.group('start')
930 if start:
931 return datetime_from_str(start)
932 try:
933 return datetime_from_str('now-{}{}'.format(mobj.group('time'), mobj.group('unit')))
934 except ValueError:
935 return None
936
937 def _parse_time_text(self, text):
938 if not text:
939 return
940 dt_ = self.extract_relative_time(text)
941 timestamp = None
942 if isinstance(dt_, dt.datetime):
943 timestamp = calendar.timegm(dt_.timetuple())
944
945 if timestamp is None:
946 timestamp = (
947 unified_timestamp(text) or unified_timestamp(
948 self._search_regex(
949 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
950 text.lower(), 'time text', default=None)))
951
952 if text and timestamp is None and self._preferred_lang in (None, 'en'):
953 self.report_warning(
954 f'Cannot parse localized time text "{text}"', only_once=True)
955 return timestamp
956
957 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
958 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
959 default_client='web'):
960 raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
961 # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
962 icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
963 icd_rm = next(icd_retries)
964 main_retries = iter(self.RetryManager())
965 main_rm = next(main_retries)
966 # Manual retry loop for multiple RetryManagers
967 # The proper RetryManager MUST be advanced after an error
968 # and its result MUST be checked if the manager is non fatal
969 while True:
970 try:
971 response = self._call_api(
972 ep=ep, fatal=True, headers=headers,
973 video_id=item_id, query=query, note=note,
974 context=self._extract_context(ytcfg, default_client),
975 api_key=self._extract_api_key(ytcfg, default_client),
976 api_hostname=api_hostname, default_client=default_client)
977 except ExtractorError as e:
978 if not isinstance(e.cause, network_exceptions):
979 return self._error_or_warning(e, fatal=fatal)
980 elif not isinstance(e.cause, HTTPError):
981 main_rm.error = e
982 next(main_retries)
983 continue
984
985 first_bytes = e.cause.response.read(512)
986 if not is_html(first_bytes):
987 yt_error = try_get(
988 self._parse_json(
989 self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
990 lambda x: x['error']['message'], str)
991 if yt_error:
992 self._report_alerts([('ERROR', yt_error)], fatal=False)
993 # Downloading page may result in intermittent 5xx HTTP error
994 # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
995 # We also want to catch all other network exceptions since errors in later pages can be troublesome
996 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
997 if e.cause.status not in (403, 429):
998 main_rm.error = e
999 next(main_retries)
1000 continue
1001 return self._error_or_warning(e, fatal=fatal)
1002
1003 try:
1004 self._extract_and_report_alerts(response, only_once=True)
1005 except ExtractorError as e:
1006 # YouTube's servers may return errors we want to retry on in a 200 OK response
1007 # See: https://github.com/yt-dlp/yt-dlp/issues/839
1008 if 'unknown error' in e.msg.lower():
1009 main_rm.error = e
1010 next(main_retries)
1011 continue
1012 return self._error_or_warning(e, fatal=fatal)
1013 # Youtube sometimes sends incomplete data
1014 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1015 if not traverse_obj(response, *variadic(check_get_keys)):
1016 icd_rm.error = ExtractorError('Incomplete data received', expected=True)
1017 should_retry = next(icd_retries, None)
1018 if not should_retry:
1019 return None
1020 continue
1021
1022 return response
1023
1024 @staticmethod
1025 def is_music_url(url):
1026 return re.match(r'(https?://)?music\.youtube\.com/', url) is not None
1027
1028 def _extract_video(self, renderer):
1029 video_id = renderer.get('videoId')
1030
1031 reel_header_renderer = traverse_obj(renderer, (
1032 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
1033 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
1034
1035 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
1036 description = self._get_text(renderer, 'descriptionSnippet')
1037
1038 duration = int_or_none(renderer.get('lengthSeconds'))
1039 if duration is None:
1040 duration = parse_duration(self._get_text(
1041 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1042 if duration is None:
1043 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
1044 duration = parse_duration(self._search_regex(
1045 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
1046 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
1047 video_id, default=None, group='duration'))
1048
1049 channel_id = traverse_obj(
1050 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
1051 expected_type=str, get_all=False)
1052 if not channel_id:
1053 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
1054
1055 channel_id = self.ucid_or_none(channel_id)
1056
1057 overlay_style = traverse_obj(
1058 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
1059 get_all=False, expected_type=str)
1060 badges = self._extract_badges(traverse_obj(renderer, 'badges'))
1061 owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
1062 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
1063 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
1064 expected_type=str)) or ''
1065 url = f'https://www.youtube.com/watch?v={video_id}'
1066 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
1067 url = f'https://www.youtube.com/shorts/{video_id}'
1068
1069 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
1070 or self._get_text(reel_header_renderer, 'timestampText') or '')
1071 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
1072
1073 live_status = (
1074 'is_upcoming' if scheduled_timestamp is not None
1075 else 'was_live' if 'streamed' in time_text.lower()
1076 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
1077 else None)
1078
1079 # videoInfo is a string like '50K views • 10 years ago'.
1080 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
1081 view_count = (0 if 'no views' in view_count_text.lower()
1082 else self._get_count({'simpleText': view_count_text}))
1083 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
1084
1085 channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')
1086 or self._get_text(reel_header_renderer, 'channelTitleText'))
1087
1088 channel_handle = traverse_obj(renderer, (
1089 'shortBylineText', 'runs', ..., 'navigationEndpoint',
1090 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),
1091 expected_type=self.handle_from_url, get_all=False)
1092 return {
1093 '_type': 'url',
1094 'ie_key': YoutubeIE.ie_key(),
1095 'id': video_id,
1096 'url': url,
1097 'title': title,
1098 'description': description,
1099 'duration': duration,
1100 'channel_id': channel_id,
1101 'channel': channel,
1102 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
1103 'uploader': channel,
1104 'uploader_id': channel_handle,
1105 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
1106 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
1107 'timestamp': (self._parse_time_text(time_text)
1108 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
1109 else None),
1110 'release_timestamp': scheduled_timestamp,
1111 'availability':
1112 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
1113 else self._availability(
1114 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
1115 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
1116 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
1117 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
1118 view_count_field: view_count,
1119 'live_status': live_status,
1120 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None,
1121 }
1122
1123
1124 class YoutubeIE(YoutubeBaseInfoExtractor):
1125 IE_DESC = 'YouTube'
1126 _VALID_URL = r'''(?x)^
1127 (
1128 (?:https?://|//) # http(s):// or protocol-independent URL
1129 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1130 (?:www\.)?deturl\.com/www\.youtube\.com|
1131 (?:www\.)?pwnyoutube\.com|
1132 (?:www\.)?hooktube\.com|
1133 (?:www\.)?yourepeat\.com|
1134 tube\.majestyc\.net|
1135 {invidious}|
1136 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
1137 (?:.*?\#/)? # handle anchor (#/) redirect urls
1138 (?: # the various things that can precede the ID:
1139 (?:(?:v|embed|e|shorts|live)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
1140 |(?: # or the v= param in all its forms
1141 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
1142 (?:\?|\#!?) # the params delimiter ? or # or #!
1143 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
1144 v=
1145 )
1146 ))
1147 |(?:
1148 youtu\.be| # just youtu.be/xxxx
1149 vid\.plus| # or vid.plus/xxxx
1150 zwearz\.com/watch| # or zwearz.com/watch/xxxx
1151 {invidious}
1152 )/
1153 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
1154 )
1155 )? # all until now is optional -> you can pass the naked ID
1156 (?P<id>[0-9A-Za-z_-]{{11}}) # here is it! the YouTube video ID
1157 (?(1).+)? # if we found the ID, everything can follow
1158 (?:\#|$)'''.format(
1159 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
1160 )
1161 _EMBED_REGEX = [
1162 r'''(?x)
1163 (?:
1164 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
1165 data-video-url=|
1166 <embed[^>]+?src=|
1167 embedSWF\(?:\s*|
1168 <object[^>]+data=|
1169 new\s+SWFObject\(
1170 )
1171 (["\'])
1172 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1173 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1174 \1''',
1175 # https://wordpress.org/plugins/lazy-load-for-videos/
1176 r'''(?xs)
1177 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1178 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1179 ]
1180 _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
1181
1182 _PLAYER_INFO_RE = (
1183 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1184 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
1185 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
1186 )
1187 _formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE
1188 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1189 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1190 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1191 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1192 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1193 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1194 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1195 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1196 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1197 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1198 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1199 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1200 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1201 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1202 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1203 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1204 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1205 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1206
1207
1208 # 3D videos
1209 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1210 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1211 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1212 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1213 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1214 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1215 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1216
1217 # Apple HTTP Live Streaming
1218 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1219 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1220 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1221 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1222 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1223 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1224 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1225 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1226
1227 # DASH mp4 video
1228 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1229 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1230 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1231 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1232 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1233 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1234 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1235 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1236 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1237 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1238 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1239 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1240
1241 # Dash mp4 audio
1242 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1243 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1244 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1245 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1246 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1247 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1248 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1249
1250 # Dash webm
1251 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1252 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1253 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1254 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1255 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1256 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1257 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1258 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1259 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1260 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1261 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1262 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1263 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1264 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1265 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1266 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1267 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1268 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1269 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1270 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1271 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1272 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1273
1274 # Dash webm audio
1275 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1276 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1277
1278 # Dash webm audio with opus inside
1279 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1280 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1281 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1282
1283 # RTMP (unnamed)
1284 '_rtmp': {'protocol': 'rtmp'},
1285
1286 # av01 video only formats sometimes served with "unknown" codecs
1287 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1288 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1289 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1290 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1291 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1292 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1293 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1294 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1295 }
1296 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1297
1298 _GEO_BYPASS = False
1299
1300 IE_NAME = 'youtube'
1301 _TESTS = [
1302 {
1303 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1304 'info_dict': {
1305 'id': 'BaW_jenozKc',
1306 'ext': 'mp4',
1307 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1308 'channel': 'Philipp Hagemeister',
1309 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1310 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1311 'upload_date': '20121002',
1312 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1313 'categories': ['Science & Technology'],
1314 'tags': ['youtube-dl'],
1315 'duration': 10,
1316 'view_count': int,
1317 'like_count': int,
1318 'availability': 'public',
1319 'playable_in_embed': True,
1320 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1321 'live_status': 'not_live',
1322 'age_limit': 0,
1323 'start_time': 1,
1324 'end_time': 9,
1325 'comment_count': int,
1326 'channel_follower_count': int,
1327 'uploader': 'Philipp Hagemeister',
1328 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1329 'uploader_id': '@PhilippHagemeister',
1330 'heatmap': 'count:100',
1331 'timestamp': 1349198244,
1332 },
1333 },
1334 {
1335 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1336 'note': 'Embed-only video (#1746)',
1337 'info_dict': {
1338 'id': 'yZIXLfi8CZQ',
1339 'ext': 'mp4',
1340 'upload_date': '20120608',
1341 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1342 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1343 'age_limit': 18,
1344 },
1345 'skip': 'Private video',
1346 },
1347 {
1348 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1349 'note': 'Use the first video ID in the URL',
1350 'info_dict': {
1351 'id': 'BaW_jenozKc',
1352 'ext': 'mp4',
1353 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1354 'channel': 'Philipp Hagemeister',
1355 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1356 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1357 'upload_date': '20121002',
1358 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1359 'categories': ['Science & Technology'],
1360 'tags': ['youtube-dl'],
1361 'duration': 10,
1362 'view_count': int,
1363 'like_count': int,
1364 'availability': 'public',
1365 'playable_in_embed': True,
1366 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1367 'live_status': 'not_live',
1368 'age_limit': 0,
1369 'comment_count': int,
1370 'channel_follower_count': int,
1371 'uploader': 'Philipp Hagemeister',
1372 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1373 'uploader_id': '@PhilippHagemeister',
1374 'heatmap': 'count:100',
1375 'timestamp': 1349198244,
1376 },
1377 'params': {
1378 'skip_download': True,
1379 },
1380 },
1381 {
1382 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1383 'note': '256k DASH audio (format 141) via DASH manifest',
1384 'info_dict': {
1385 'id': 'a9LDPn-MO4I',
1386 'ext': 'm4a',
1387 'upload_date': '20121002',
1388 'description': '',
1389 'title': 'UHDTV TEST 8K VIDEO.mp4',
1390 },
1391 'params': {
1392 'youtube_include_dash_manifest': True,
1393 'format': '141',
1394 },
1395 'skip': 'format 141 not served anymore',
1396 },
1397 # DASH manifest with encrypted signature
1398 {
1399 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1400 'info_dict': {
1401 'id': 'IB3lcPjvWLA',
1402 'ext': 'm4a',
1403 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1404 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1405 'duration': 244,
1406 'upload_date': '20131011',
1407 'abr': 129.495,
1408 'like_count': int,
1409 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1410 'playable_in_embed': True,
1411 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1412 'view_count': int,
1413 'track': 'The Spark',
1414 'live_status': 'not_live',
1415 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1416 'channel': 'Afrojack',
1417 'tags': 'count:19',
1418 'availability': 'public',
1419 'categories': ['Music'],
1420 'age_limit': 0,
1421 'alt_title': 'The Spark',
1422 'channel_follower_count': int,
1423 'uploader': 'Afrojack',
1424 'uploader_url': 'https://www.youtube.com/@Afrojack',
1425 'uploader_id': '@Afrojack',
1426 },
1427 'params': {
1428 'youtube_include_dash_manifest': True,
1429 'format': '141/bestaudio[ext=m4a]',
1430 },
1431 },
1432 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1433 {
1434 'note': 'Embed allowed age-gate video',
1435 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1436 'info_dict': {
1437 'id': 'HtVdAasjOgU',
1438 'ext': 'mp4',
1439 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1440 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1441 'duration': 142,
1442 'upload_date': '20140605',
1443 'age_limit': 18,
1444 'categories': ['Gaming'],
1445 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1446 'availability': 'needs_auth',
1447 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1448 'like_count': int,
1449 'channel': 'The Witcher',
1450 'live_status': 'not_live',
1451 'tags': 'count:17',
1452 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1453 'playable_in_embed': True,
1454 'view_count': int,
1455 'channel_follower_count': int,
1456 'uploader': 'The Witcher',
1457 'uploader_url': 'https://www.youtube.com/@thewitcher',
1458 'uploader_id': '@thewitcher',
1459 'comment_count': int,
1460 'channel_is_verified': True,
1461 'heatmap': 'count:100',
1462 'timestamp': 1401991663,
1463 },
1464 },
1465 {
1466 'note': 'Age-gate video with embed allowed in public site',
1467 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1468 'info_dict': {
1469 'id': 'HsUATh_Nc2U',
1470 'ext': 'mp4',
1471 'title': 'Godzilla 2 (Official Video)',
1472 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1473 'upload_date': '20200408',
1474 'age_limit': 18,
1475 'availability': 'needs_auth',
1476 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1477 'channel': 'FlyingKitty',
1478 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1479 'view_count': int,
1480 'categories': ['Entertainment'],
1481 'live_status': 'not_live',
1482 'tags': ['Flyingkitty', 'godzilla 2'],
1483 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1484 'like_count': int,
1485 'duration': 177,
1486 'playable_in_embed': True,
1487 'channel_follower_count': int,
1488 'uploader': 'FlyingKitty',
1489 'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
1490 'uploader_id': '@FlyingKitty900',
1491 'comment_count': int,
1492 'channel_is_verified': True,
1493 },
1494 },
1495 {
1496 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1497 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1498 'info_dict': {
1499 'id': 'Tq92D6wQ1mg',
1500 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1501 'ext': 'mp4',
1502 'upload_date': '20191228',
1503 'description': 'md5:17eccca93a786d51bc67646756894066',
1504 'age_limit': 18,
1505 'like_count': int,
1506 'availability': 'needs_auth',
1507 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1508 'view_count': int,
1509 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1510 'channel': 'Projekt Melody',
1511 'live_status': 'not_live',
1512 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1513 'playable_in_embed': True,
1514 'categories': ['Entertainment'],
1515 'duration': 106,
1516 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1517 'comment_count': int,
1518 'channel_follower_count': int,
1519 'uploader': 'Projekt Melody',
1520 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
1521 'uploader_id': '@ProjektMelody',
1522 'timestamp': 1577508724,
1523 },
1524 },
1525 {
1526 'note': 'Non-Agegated non-embeddable video',
1527 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1528 'info_dict': {
1529 'id': 'MeJVWBSsPAY',
1530 'ext': 'mp4',
1531 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1532 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1533 'upload_date': '20130730',
1534 'track': 'Such mich find mich',
1535 'age_limit': 0,
1536 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1537 'like_count': int,
1538 'playable_in_embed': False,
1539 'creator': 'OOMPH!',
1540 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1541 'view_count': int,
1542 'alt_title': 'Such mich find mich',
1543 'duration': 210,
1544 'channel': 'Herr Lurik',
1545 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1546 'categories': ['Music'],
1547 'availability': 'public',
1548 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1549 'live_status': 'not_live',
1550 'artist': 'OOMPH!',
1551 'channel_follower_count': int,
1552 'uploader': 'Herr Lurik',
1553 'uploader_url': 'https://www.youtube.com/@HerrLurik',
1554 'uploader_id': '@HerrLurik',
1555 },
1556 },
1557 {
1558 'note': 'Non-bypassable age-gated video',
1559 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1560 'only_matching': True,
1561 },
1562 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1563 # YouTube Red ad is not captured for creator
1564 {
1565 'url': '__2ABJjxzNo',
1566 'info_dict': {
1567 'id': '__2ABJjxzNo',
1568 'ext': 'mp4',
1569 'duration': 266,
1570 'upload_date': '20100430',
1571 'creator': 'deadmau5',
1572 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1573 'title': 'Deadmau5 - Some Chords (HD)',
1574 'alt_title': 'Some Chords',
1575 'availability': 'public',
1576 'tags': 'count:14',
1577 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1578 'view_count': int,
1579 'live_status': 'not_live',
1580 'channel': 'deadmau5',
1581 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1582 'like_count': int,
1583 'track': 'Some Chords',
1584 'artist': 'deadmau5',
1585 'playable_in_embed': True,
1586 'age_limit': 0,
1587 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1588 'categories': ['Music'],
1589 'album': 'Some Chords',
1590 'channel_follower_count': int,
1591 'uploader': 'deadmau5',
1592 'uploader_url': 'https://www.youtube.com/@deadmau5',
1593 'uploader_id': '@deadmau5',
1594 },
1595 'expected_warnings': [
1596 'DASH manifest missing',
1597 ],
1598 },
1599 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1600 {
1601 'url': 'lqQg6PlCWgI',
1602 'info_dict': {
1603 'id': 'lqQg6PlCWgI',
1604 'ext': 'mp4',
1605 'duration': 6085,
1606 'upload_date': '20150827',
1607 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1608 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1609 'like_count': int,
1610 'release_timestamp': 1343767800,
1611 'playable_in_embed': True,
1612 'categories': ['Sports'],
1613 'release_date': '20120731',
1614 'channel': 'Olympics',
1615 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1616 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1617 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1618 'age_limit': 0,
1619 'availability': 'public',
1620 'live_status': 'was_live',
1621 'view_count': int,
1622 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1623 'channel_follower_count': int,
1624 'uploader': 'Olympics',
1625 'uploader_url': 'https://www.youtube.com/@Olympics',
1626 'uploader_id': '@Olympics',
1627 'channel_is_verified': True,
1628 'timestamp': 1440707674,
1629 },
1630 'params': {
1631 'skip_download': 'requires avconv',
1632 },
1633 },
1634 # Non-square pixels
1635 {
1636 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1637 'info_dict': {
1638 'id': '_b-2C3KPAM0',
1639 'ext': 'mp4',
1640 'stretched_ratio': 16 / 9.,
1641 'duration': 85,
1642 'upload_date': '20110310',
1643 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1644 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1645 'playable_in_embed': True,
1646 'channel': '孫ᄋᄅ',
1647 'age_limit': 0,
1648 'tags': 'count:11',
1649 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1650 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1651 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1652 'view_count': int,
1653 'categories': ['People & Blogs'],
1654 'like_count': int,
1655 'live_status': 'not_live',
1656 'availability': 'unlisted',
1657 'comment_count': int,
1658 'channel_follower_count': int,
1659 'uploader': '孫ᄋᄅ',
1660 'uploader_url': 'https://www.youtube.com/@AllenMeow',
1661 'uploader_id': '@AllenMeow',
1662 'timestamp': 1299776999,
1663 },
1664 },
1665 # url_encoded_fmt_stream_map is empty string
1666 {
1667 'url': 'qEJwOuvDf7I',
1668 'info_dict': {
1669 'id': 'qEJwOuvDf7I',
1670 'ext': 'webm',
1671 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1672 'description': '',
1673 'upload_date': '20150404',
1674 },
1675 'params': {
1676 'skip_download': 'requires avconv',
1677 },
1678 'skip': 'This live event has ended.',
1679 },
1680 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1681 {
1682 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1683 'info_dict': {
1684 'id': 'FIl7x6_3R5Y',
1685 'ext': 'webm',
1686 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1687 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1688 'duration': 220,
1689 'upload_date': '20150625',
1690 'formats': 'mincount:31',
1691 },
1692 'skip': 'not actual anymore',
1693 },
1694 # DASH manifest with segment_list
1695 {
1696 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1697 'md5': '8ce563a1d667b599d21064e982ab9e31',
1698 'info_dict': {
1699 'id': 'CsmdDsKjzN8',
1700 'ext': 'mp4',
1701 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1702 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1703 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1704 },
1705 'params': {
1706 'youtube_include_dash_manifest': True,
1707 'format': '135', # bestvideo
1708 },
1709 'skip': 'This live event has ended.',
1710 },
1711 {
1712 # Multifeed videos (multiple cameras), URL can be of any Camera
1713 # TODO: fix multifeed titles
1714 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
1715 'info_dict': {
1716 'id': 'zaPI8MvL8pg',
1717 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
1718 'description': 'md5:563ccbc698b39298481ca3c571169519',
1719 },
1720 'playlist': [{
1721 'info_dict': {
1722 'id': 'j5yGuxZ8lLU',
1723 'ext': 'mp4',
1724 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
1725 'description': 'md5:563ccbc698b39298481ca3c571169519',
1726 'duration': 10120,
1727 'channel_follower_count': int,
1728 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1729 'availability': 'public',
1730 'playable_in_embed': True,
1731 'upload_date': '20131105',
1732 'categories': ['Gaming'],
1733 'live_status': 'was_live',
1734 'tags': 'count:24',
1735 'release_timestamp': 1383701910,
1736 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
1737 'comment_count': int,
1738 'age_limit': 0,
1739 'like_count': int,
1740 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1741 'channel': 'WiiLikeToPlay',
1742 'view_count': int,
1743 'release_date': '20131106',
1744 'uploader': 'WiiLikeToPlay',
1745 'uploader_id': '@WLTP',
1746 'uploader_url': 'https://www.youtube.com/@WLTP',
1747 },
1748 }, {
1749 'info_dict': {
1750 'id': 'zaPI8MvL8pg',
1751 'ext': 'mp4',
1752 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
1753 'availability': 'public',
1754 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1755 'channel': 'WiiLikeToPlay',
1756 'channel_follower_count': int,
1757 'description': 'md5:563ccbc698b39298481ca3c571169519',
1758 'duration': 10108,
1759 'age_limit': 0,
1760 'like_count': int,
1761 'tags': 'count:24',
1762 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1763 'release_timestamp': 1383701915,
1764 'comment_count': int,
1765 'upload_date': '20131105',
1766 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
1767 'release_date': '20131106',
1768 'playable_in_embed': True,
1769 'live_status': 'was_live',
1770 'categories': ['Gaming'],
1771 'view_count': int,
1772 'uploader': 'WiiLikeToPlay',
1773 'uploader_id': '@WLTP',
1774 'uploader_url': 'https://www.youtube.com/@WLTP',
1775 },
1776 }, {
1777 'info_dict': {
1778 'id': 'R7r3vfO7Hao',
1779 'ext': 'mp4',
1780 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
1781 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
1782 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1783 'like_count': int,
1784 'availability': 'public',
1785 'playable_in_embed': True,
1786 'upload_date': '20131105',
1787 'description': 'md5:563ccbc698b39298481ca3c571169519',
1788 'channel_follower_count': int,
1789 'tags': 'count:24',
1790 'release_date': '20131106',
1791 'comment_count': int,
1792 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1793 'channel': 'WiiLikeToPlay',
1794 'categories': ['Gaming'],
1795 'release_timestamp': 1383701914,
1796 'live_status': 'was_live',
1797 'age_limit': 0,
1798 'duration': 10128,
1799 'view_count': int,
1800 'uploader': 'WiiLikeToPlay',
1801 'uploader_id': '@WLTP',
1802 'uploader_url': 'https://www.youtube.com/@WLTP',
1803 },
1804 }],
1805 'params': {'skip_download': True},
1806 'skip': 'Not multifeed anymore',
1807 },
1808 {
1809 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1810 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1811 'info_dict': {
1812 'id': 'gVfLd0zydlo',
1813 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1814 },
1815 'playlist_count': 2,
1816 'skip': 'Not multifeed anymore',
1817 },
1818 {
1819 'url': 'https://vid.plus/FlRa-iH7PGw',
1820 'only_matching': True,
1821 },
1822 {
1823 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1824 'only_matching': True,
1825 },
1826 {
1827 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1828 # Also tests cut-off URL expansion in video description (see
1829 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1830 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1831 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1832 'info_dict': {
1833 'id': 'lsguqyKfVQg',
1834 'ext': 'mp4',
1835 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1836 'alt_title': 'Dark Walk',
1837 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1838 'duration': 133,
1839 'upload_date': '20151119',
1840 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1841 'track': 'Dark Walk',
1842 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1843 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1844 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1845 'categories': ['Film & Animation'],
1846 'view_count': int,
1847 'live_status': 'not_live',
1848 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1849 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1850 'tags': 'count:13',
1851 'availability': 'public',
1852 'channel': 'IronSoulElf',
1853 'playable_in_embed': True,
1854 'like_count': int,
1855 'age_limit': 0,
1856 'channel_follower_count': int,
1857 },
1858 'params': {
1859 'skip_download': True,
1860 },
1861 },
1862 {
1863 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1864 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1865 'only_matching': True,
1866 },
1867 {
1868 # Video with yt:stretch=17:0
1869 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1870 'info_dict': {
1871 'id': 'Q39EVAstoRM',
1872 'ext': 'mp4',
1873 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1874 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1875 'upload_date': '20151107',
1876 },
1877 'params': {
1878 'skip_download': True,
1879 },
1880 'skip': 'This video does not exist.',
1881 },
1882 {
1883 # Video with incomplete 'yt:stretch=16:'
1884 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1885 'only_matching': True,
1886 },
1887 {
1888 # Video licensed under Creative Commons
1889 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1890 'info_dict': {
1891 'id': 'M4gD1WSo5mA',
1892 'ext': 'mp4',
1893 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1894 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1895 'duration': 721,
1896 'upload_date': '20150128',
1897 'license': 'Creative Commons Attribution license (reuse allowed)',
1898 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1899 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1900 'like_count': int,
1901 'age_limit': 0,
1902 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1903 'channel': 'The Berkman Klein Center for Internet & Society',
1904 'availability': 'public',
1905 'view_count': int,
1906 'categories': ['Education'],
1907 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1908 'live_status': 'not_live',
1909 'playable_in_embed': True,
1910 'channel_follower_count': int,
1911 'chapters': list,
1912 'uploader': 'The Berkman Klein Center for Internet & Society',
1913 'uploader_id': '@BKCHarvard',
1914 'uploader_url': 'https://www.youtube.com/@BKCHarvard',
1915 'timestamp': 1422422076,
1916 },
1917 'params': {
1918 'skip_download': True,
1919 },
1920 },
1921 {
1922 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1923 'info_dict': {
1924 'id': 'eQcmzGIKrzg',
1925 'ext': 'mp4',
1926 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1927 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1928 'duration': 4060,
1929 'upload_date': '20151120',
1930 'license': 'Creative Commons Attribution license (reuse allowed)',
1931 'playable_in_embed': True,
1932 'tags': 'count:12',
1933 'like_count': int,
1934 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1935 'age_limit': 0,
1936 'availability': 'public',
1937 'categories': ['News & Politics'],
1938 'channel': 'Bernie Sanders',
1939 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1940 'view_count': int,
1941 'live_status': 'not_live',
1942 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1943 'comment_count': int,
1944 'channel_follower_count': int,
1945 'chapters': list,
1946 'uploader': 'Bernie Sanders',
1947 'uploader_url': 'https://www.youtube.com/@BernieSanders',
1948 'uploader_id': '@BernieSanders',
1949 'channel_is_verified': True,
1950 'heatmap': 'count:100',
1951 'timestamp': 1447987198,
1952 },
1953 'params': {
1954 'skip_download': True,
1955 },
1956 },
1957 {
1958 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1959 'only_matching': True,
1960 },
1961 {
1962 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1963 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1964 'only_matching': True,
1965 },
1966 {
1967 # Rental video preview
1968 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1969 'info_dict': {
1970 'id': 'uGpuVWrhIzE',
1971 'ext': 'mp4',
1972 'title': 'Piku - Trailer',
1973 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1974 'upload_date': '20150811',
1975 'license': 'Standard YouTube License',
1976 },
1977 'params': {
1978 'skip_download': True,
1979 },
1980 'skip': 'This video is not available.',
1981 },
1982 {
1983 # YouTube Red video with episode data
1984 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1985 'info_dict': {
1986 'id': 'iqKdEhx-dD4',
1987 'ext': 'mp4',
1988 'title': 'Isolation - Mind Field (Ep 1)',
1989 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1990 'duration': 2085,
1991 'upload_date': '20170118',
1992 'series': 'Mind Field',
1993 'season_number': 1,
1994 'episode_number': 1,
1995 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1996 'tags': 'count:12',
1997 'view_count': int,
1998 'availability': 'public',
1999 'age_limit': 0,
2000 'channel': 'Vsauce',
2001 'episode': 'Episode 1',
2002 'categories': ['Entertainment'],
2003 'season': 'Season 1',
2004 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
2005 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
2006 'like_count': int,
2007 'playable_in_embed': True,
2008 'live_status': 'not_live',
2009 'channel_follower_count': int,
2010 'uploader': 'Vsauce',
2011 'uploader_url': 'https://www.youtube.com/@Vsauce',
2012 'uploader_id': '@Vsauce',
2013 'comment_count': int,
2014 'channel_is_verified': True,
2015 'timestamp': 1484761047,
2016 },
2017 'params': {
2018 'skip_download': True,
2019 },
2020 'expected_warnings': [
2021 'Skipping DASH manifest',
2022 ],
2023 },
2024 {
2025 # The following content has been identified by the YouTube community
2026 # as inappropriate or offensive to some audiences.
2027 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
2028 'info_dict': {
2029 'id': '6SJNVb0GnPI',
2030 'ext': 'mp4',
2031 'title': 'Race Differences in Intelligence',
2032 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
2033 'duration': 965,
2034 'upload_date': '20140124',
2035 },
2036 'params': {
2037 'skip_download': True,
2038 },
2039 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
2040 },
2041 {
2042 # itag 212
2043 'url': '1t24XAntNCY',
2044 'only_matching': True,
2045 },
2046 {
2047 # geo restricted to JP
2048 'url': 'sJL6WA-aGkQ',
2049 'only_matching': True,
2050 },
2051 {
2052 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
2053 'only_matching': True,
2054 },
2055 {
2056 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
2057 'only_matching': True,
2058 },
2059 {
2060 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
2061 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
2062 'only_matching': True,
2063 },
2064 {
2065 # DRM protected
2066 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
2067 'only_matching': True,
2068 },
2069 {
2070 # Video with unsupported adaptive stream type formats
2071 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
2072 'info_dict': {
2073 'id': 'Z4Vy8R84T1U',
2074 'ext': 'mp4',
2075 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
2076 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
2077 'duration': 433,
2078 'upload_date': '20130923',
2079 'formats': 'maxcount:10',
2080 },
2081 'params': {
2082 'skip_download': True,
2083 'youtube_include_dash_manifest': False,
2084 },
2085 'skip': 'not actual anymore',
2086 },
2087 {
2088 # Youtube Music Auto-generated description
2089 # TODO: fix metadata extraction
2090 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2091 'info_dict': {
2092 'id': 'MgNrAu2pzNs',
2093 'ext': 'mp4',
2094 'title': 'Voyeur Girl',
2095 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
2096 'upload_date': '20190312',
2097 'artists': ['Stephen'],
2098 'creators': ['Stephen'],
2099 'track': 'Voyeur Girl',
2100 'album': 'it\'s too much love to know my dear',
2101 'release_date': '20190313',
2102 'alt_title': 'Voyeur Girl',
2103 'view_count': int,
2104 'playable_in_embed': True,
2105 'like_count': int,
2106 'categories': ['Music'],
2107 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
2108 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
2109 'uploader': 'Stephen',
2110 'availability': 'public',
2111 'duration': 169,
2112 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
2113 'age_limit': 0,
2114 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
2115 'tags': 'count:11',
2116 'live_status': 'not_live',
2117 'channel_follower_count': int,
2118 },
2119 'params': {
2120 'skip_download': True,
2121 },
2122 },
2123 {
2124 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
2125 'only_matching': True,
2126 },
2127 {
2128 # invalid -> valid video id redirection
2129 'url': 'DJztXj2GPfl',
2130 'info_dict': {
2131 'id': 'DJztXj2GPfk',
2132 'ext': 'mp4',
2133 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
2134 'description': 'md5:bf577a41da97918e94fa9798d9228825',
2135 'upload_date': '20090125',
2136 'artist': 'Panjabi MC',
2137 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
2138 'album': 'Beware of the Boys (Mundian To Bach Ke)',
2139 },
2140 'params': {
2141 'skip_download': True,
2142 },
2143 'skip': 'Video unavailable',
2144 },
2145 {
2146 # empty description results in an empty string
2147 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
2148 'info_dict': {
2149 'id': 'x41yOUIvK2k',
2150 'ext': 'mp4',
2151 'title': 'IMG 3456',
2152 'description': '',
2153 'upload_date': '20170613',
2154 'view_count': int,
2155 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
2156 'like_count': int,
2157 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2158 'tags': [],
2159 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2160 'availability': 'public',
2161 'age_limit': 0,
2162 'categories': ['Pets & Animals'],
2163 'duration': 7,
2164 'playable_in_embed': True,
2165 'live_status': 'not_live',
2166 'channel': 'l\'Or Vert asbl',
2167 'channel_follower_count': int,
2168 'uploader': 'l\'Or Vert asbl',
2169 'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
2170 'uploader_id': '@ElevageOrVert',
2171 'timestamp': 1497343210,
2172 },
2173 'params': {
2174 'skip_download': True,
2175 },
2176 },
2177 {
2178 # with '};' inside yt initial data (see [1])
2179 # see [2] for an example with '};' inside ytInitialPlayerResponse
2180 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2181 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
2182 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2183 'info_dict': {
2184 'id': 'CHqg6qOn4no',
2185 'ext': 'mp4',
2186 'title': 'Part 77 Sort a list of simple types in c#',
2187 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2188 'upload_date': '20130831',
2189 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2190 'like_count': int,
2191 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2192 'live_status': 'not_live',
2193 'categories': ['Education'],
2194 'availability': 'public',
2195 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2196 'tags': 'count:12',
2197 'playable_in_embed': True,
2198 'age_limit': 0,
2199 'view_count': int,
2200 'duration': 522,
2201 'channel': 'kudvenkat',
2202 'comment_count': int,
2203 'channel_follower_count': int,
2204 'chapters': list,
2205 'uploader': 'kudvenkat',
2206 'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
2207 'uploader_id': '@Csharp-video-tutorialsBlogspot',
2208 'channel_is_verified': True,
2209 'heatmap': 'count:100',
2210 'timestamp': 1377976349,
2211 },
2212 'params': {
2213 'skip_download': True,
2214 },
2215 },
2216 {
2217 # another example of '};' in ytInitialData
2218 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2219 'only_matching': True,
2220 },
2221 {
2222 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2223 'only_matching': True,
2224 },
2225 {
2226 # https://github.com/ytdl-org/youtube-dl/pull/28094
2227 'url': 'OtqTfy26tG0',
2228 'info_dict': {
2229 'id': 'OtqTfy26tG0',
2230 'ext': 'mp4',
2231 'title': 'Burn Out',
2232 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2233 'upload_date': '20141120',
2234 'artist': 'The Cinematic Orchestra',
2235 'track': 'Burn Out',
2236 'album': 'Every Day',
2237 'like_count': int,
2238 'live_status': 'not_live',
2239 'alt_title': 'Burn Out',
2240 'duration': 614,
2241 'age_limit': 0,
2242 'view_count': int,
2243 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2244 'creator': 'The Cinematic Orchestra',
2245 'channel': 'The Cinematic Orchestra',
2246 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2247 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2248 'availability': 'public',
2249 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2250 'categories': ['Music'],
2251 'playable_in_embed': True,
2252 'channel_follower_count': int,
2253 'uploader': 'The Cinematic Orchestra',
2254 'comment_count': int,
2255 },
2256 'params': {
2257 'skip_download': True,
2258 },
2259 },
2260 {
2261 # controversial video, only works with bpctr when authenticated with cookies
2262 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2263 'only_matching': True,
2264 },
2265 {
2266 # controversial video, requires bpctr/contentCheckOk
2267 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2268 'info_dict': {
2269 'id': 'SZJvDhaSDnc',
2270 'ext': 'mp4',
2271 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2272 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
2273 'upload_date': '20140716',
2274 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2275 'duration': 170,
2276 'categories': ['News & Politics'],
2277 'view_count': int,
2278 'channel': 'CBS Mornings',
2279 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2280 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2281 'age_limit': 18,
2282 'availability': 'needs_auth',
2283 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2284 'like_count': int,
2285 'live_status': 'not_live',
2286 'playable_in_embed': True,
2287 'channel_follower_count': int,
2288 'uploader': 'CBS Mornings',
2289 'uploader_url': 'https://www.youtube.com/@CBSMornings',
2290 'uploader_id': '@CBSMornings',
2291 'comment_count': int,
2292 'channel_is_verified': True,
2293 'timestamp': 1405513526,
2294 },
2295 },
2296 {
2297 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2298 'url': 'cBvYw8_A0vQ',
2299 'info_dict': {
2300 'id': 'cBvYw8_A0vQ',
2301 'ext': 'mp4',
2302 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2303 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2304 'upload_date': '20201120',
2305 'duration': 1456,
2306 'categories': ['Travel & Events'],
2307 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2308 'view_count': int,
2309 'channel': 'Walk around Japan',
2310 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2311 'thumbnail': 'https://i.ytimg.com/vi/cBvYw8_A0vQ/hqdefault.jpg',
2312 'age_limit': 0,
2313 'availability': 'public',
2314 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2315 'live_status': 'not_live',
2316 'playable_in_embed': True,
2317 'channel_follower_count': int,
2318 'uploader': 'Walk around Japan',
2319 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
2320 'uploader_id': '@walkaroundjapan7124',
2321 'timestamp': 1605884416,
2322 },
2323 'params': {
2324 'skip_download': True,
2325 },
2326 }, {
2327 # Has multiple audio streams
2328 'url': 'WaOKSUlf4TM',
2329 'only_matching': True,
2330 }, {
2331 # Requires Premium: has format 141 when requested using YTM url
2332 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2333 'only_matching': True,
2334 }, {
2335 # multiple subtitles with same lang_code
2336 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2337 'only_matching': True,
2338 }, {
2339 # Force use android client fallback
2340 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2341 'info_dict': {
2342 'id': 'YOelRv7fMxY',
2343 'title': 'DIGGING A SECRET TUNNEL Part 1',
2344 'ext': '3gp',
2345 'upload_date': '20210624',
2346 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2347 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2348 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2349 'duration': 596,
2350 'categories': ['Entertainment'],
2351 'view_count': int,
2352 'channel': 'colinfurze',
2353 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2354 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2355 'age_limit': 0,
2356 'availability': 'public',
2357 'like_count': int,
2358 'live_status': 'not_live',
2359 'playable_in_embed': True,
2360 'channel_follower_count': int,
2361 'chapters': list,
2362 'uploader': 'colinfurze',
2363 'uploader_url': 'https://www.youtube.com/@colinfurze',
2364 'uploader_id': '@colinfurze',
2365 'comment_count': int,
2366 'channel_is_verified': True,
2367 'heatmap': 'count:100',
2368 },
2369 'params': {
2370 'format': '17', # 3gp format available on android
2371 'extractor_args': {'youtube': {'player_client': ['android']}},
2372 },
2373 'skip': 'android client broken',
2374 },
2375 {
2376 # Skip download of additional client configs (remix client config in this case)
2377 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2378 'only_matching': True,
2379 'params': {
2380 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2381 },
2382 }, {
2383 # shorts
2384 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2385 'only_matching': True,
2386 }, {
2387 'note': 'Storyboards',
2388 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2389 'info_dict': {
2390 'id': '5KLPxDtMqe8',
2391 'ext': 'mhtml',
2392 'format_id': 'sb0',
2393 'title': 'Your Brain is Plastic',
2394 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2395 'upload_date': '20140324',
2396 'like_count': int,
2397 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2398 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2399 'view_count': int,
2400 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2401 'playable_in_embed': True,
2402 'tags': 'count:12',
2403 'availability': 'public',
2404 'channel': 'SciShow',
2405 'live_status': 'not_live',
2406 'duration': 248,
2407 'categories': ['Education'],
2408 'age_limit': 0,
2409 'channel_follower_count': int,
2410 'chapters': list,
2411 'uploader': 'SciShow',
2412 'uploader_url': 'https://www.youtube.com/@SciShow',
2413 'uploader_id': '@SciShow',
2414 'comment_count': int,
2415 'channel_is_verified': True,
2416 'heatmap': 'count:100',
2417 'timestamp': 1395685455,
2418 }, 'params': {'format': 'mhtml', 'skip_download': True},
2419 }, {
2420 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2421 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2422 'info_dict': {
2423 'id': '2NUZ8W2llS4',
2424 'ext': 'mp4',
2425 'title': 'The NP that test your phone performance 🙂',
2426 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2427 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2428 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2429 'duration': 21,
2430 'view_count': int,
2431 'age_limit': 0,
2432 'categories': ['Gaming'],
2433 'tags': 'count:23',
2434 'playable_in_embed': True,
2435 'live_status': 'not_live',
2436 'upload_date': '20220103',
2437 'like_count': int,
2438 'availability': 'public',
2439 'channel': 'Leon Nguyen',
2440 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2441 'comment_count': int,
2442 'channel_follower_count': int,
2443 'uploader': 'Leon Nguyen',
2444 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2445 'uploader_id': '@LeonNguyen',
2446 'heatmap': 'count:100',
2447 'timestamp': 1641170939,
2448 },
2449 }, {
2450 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2451 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2452 'info_dict': {
2453 'id': 'mzZzzBU6lrM',
2454 'ext': 'mp4',
2455 'title': 'I Met GeorgeNotFound In Real Life...',
2456 'description': 'md5:978296ec9783a031738b684d4ebf302d',
2457 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2458 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2459 'duration': 955,
2460 'view_count': int,
2461 'age_limit': 0,
2462 'categories': ['Entertainment'],
2463 'tags': 'count:26',
2464 'playable_in_embed': True,
2465 'live_status': 'not_live',
2466 'release_timestamp': 1641172509,
2467 'release_date': '20220103',
2468 'upload_date': '20220103',
2469 'like_count': int,
2470 'availability': 'public',
2471 'channel': 'Quackity',
2472 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2473 'channel_follower_count': int,
2474 'uploader': 'Quackity',
2475 'uploader_id': '@Quackity',
2476 'uploader_url': 'https://www.youtube.com/@Quackity',
2477 'comment_count': int,
2478 'channel_is_verified': True,
2479 'heatmap': 'count:100',
2480 'timestamp': 1641172509,
2481 },
2482 },
2483 { # continuous livestream.
2484 # Upload date was 2022-07-12T05:12:29-07:00, while stream start is 2022-07-12T15:59:30+00:00
2485 'url': 'https://www.youtube.com/watch?v=jfKfPfyJRdk',
2486 'info_dict': {
2487 'id': 'jfKfPfyJRdk',
2488 'ext': 'mp4',
2489 'channel_id': 'UCSJ4gkVC6NrvII8umztf0Ow',
2490 'like_count': int,
2491 'uploader': 'Lofi Girl',
2492 'categories': ['Music'],
2493 'concurrent_view_count': int,
2494 'playable_in_embed': True,
2495 'timestamp': 1657627949,
2496 'release_date': '20220712',
2497 'channel_url': 'https://www.youtube.com/channel/UCSJ4gkVC6NrvII8umztf0Ow',
2498 'description': 'md5:13a6f76df898f5674f9127139f3df6f7',
2499 'age_limit': 0,
2500 'thumbnail': 'https://i.ytimg.com/vi/jfKfPfyJRdk/maxresdefault.jpg',
2501 'release_timestamp': 1657641570,
2502 'uploader_url': 'https://www.youtube.com/@LofiGirl',
2503 'channel_follower_count': int,
2504 'channel_is_verified': True,
2505 'title': r're:^lofi hip hop radio 📚 - beats to relax/study to',
2506 'view_count': int,
2507 'live_status': 'is_live',
2508 'tags': 'count:32',
2509 'channel': 'Lofi Girl',
2510 'availability': 'public',
2511 'upload_date': '20220712',
2512 'uploader_id': '@LofiGirl',
2513 },
2514 'params': {'skip_download': True},
2515 }, {
2516 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2517 'info_dict': {
2518 'id': 'tjjjtzRLHvA',
2519 'ext': 'mp4',
2520 'title': 'ハッシュタグ無し };if window.ytcsi',
2521 'upload_date': '20220323',
2522 'like_count': int,
2523 'availability': 'unlisted',
2524 'channel': 'Lesmiscore',
2525 'thumbnail': r're:^https?://.*\.jpg',
2526 'age_limit': 0,
2527 'categories': ['Music'],
2528 'view_count': int,
2529 'description': '',
2530 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2531 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2532 'live_status': 'not_live',
2533 'playable_in_embed': True,
2534 'channel_follower_count': int,
2535 'duration': 6,
2536 'tags': [],
2537 'uploader_id': '@lesmiscore',
2538 'uploader': 'Lesmiscore',
2539 'uploader_url': 'https://www.youtube.com/@lesmiscore',
2540 'timestamp': 1648005313,
2541 },
2542 }, {
2543 # Prefer primary title+description language metadata by default
2544 # Do not prefer translated description if primary is empty
2545 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2546 'info_dict': {
2547 'id': 'el3E4MbxRqQ',
2548 'ext': 'mp4',
2549 'title': 'dlp test video 2 - primary sv no desc',
2550 'description': '',
2551 'channel': 'cole-dlp-test-acc',
2552 'tags': [],
2553 'view_count': int,
2554 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2555 'like_count': int,
2556 'playable_in_embed': True,
2557 'availability': 'unlisted',
2558 'thumbnail': r're:^https?://.*\.jpg',
2559 'age_limit': 0,
2560 'duration': 5,
2561 'live_status': 'not_live',
2562 'upload_date': '20220908',
2563 'categories': ['People & Blogs'],
2564 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2565 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2566 'uploader_id': '@coletdjnz',
2567 'uploader': 'cole-dlp-test-acc',
2568 'timestamp': 1662677394,
2569 },
2570 'params': {'skip_download': True},
2571 }, {
2572 # Extractor argument: prefer translated title+description
2573 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2574 'info_dict': {
2575 'id': 'gHKT4uU8Zng',
2576 'ext': 'mp4',
2577 'channel': 'cole-dlp-test-acc',
2578 'tags': [],
2579 'duration': 5,
2580 'live_status': 'not_live',
2581 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2582 'upload_date': '20220729',
2583 'view_count': int,
2584 'categories': ['People & Blogs'],
2585 'thumbnail': r're:^https?://.*\.jpg',
2586 'title': 'dlp test video title translated (fr)',
2587 'availability': 'public',
2588 'age_limit': 0,
2589 'description': 'dlp test video description translated (fr)',
2590 'playable_in_embed': True,
2591 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2592 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2593 'uploader_id': '@coletdjnz',
2594 'uploader': 'cole-dlp-test-acc',
2595 'timestamp': 1659073275,
2596 'like_count': int,
2597 },
2598 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2599 'expected_warnings': [r'Preferring "fr" translated fields'],
2600 }, {
2601 'note': '6 channel audio',
2602 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2603 'only_matching': True,
2604 }, {
2605 'note': 'Multiple HLS formats with same itag',
2606 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
2607 'info_dict': {
2608 'id': 'kX3nB4PpJko',
2609 'ext': 'mp4',
2610 'categories': ['Entertainment'],
2611 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
2612 'live_status': 'not_live',
2613 'duration': 937,
2614 'channel_follower_count': int,
2615 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
2616 'title': 'Last To Take Hand Off Jet, Keeps It!',
2617 'channel': 'MrBeast',
2618 'playable_in_embed': True,
2619 'view_count': int,
2620 'upload_date': '20221112',
2621 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
2622 'age_limit': 0,
2623 'availability': 'public',
2624 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
2625 'like_count': int,
2626 'tags': [],
2627 'uploader': 'MrBeast',
2628 'uploader_url': 'https://www.youtube.com/@MrBeast',
2629 'uploader_id': '@MrBeast',
2630 'comment_count': int,
2631 'channel_is_verified': True,
2632 'heatmap': 'count:100',
2633 },
2634 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
2635 }, {
2636 'note': 'Audio formats with Dynamic Range Compression',
2637 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
2638 'info_dict': {
2639 'id': 'Tq92D6wQ1mg',
2640 'ext': 'webm',
2641 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
2642 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2643 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2644 'channel_follower_count': int,
2645 'description': 'md5:17eccca93a786d51bc67646756894066',
2646 'upload_date': '20191228',
2647 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
2648 'playable_in_embed': True,
2649 'like_count': int,
2650 'categories': ['Entertainment'],
2651 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
2652 'age_limit': 18,
2653 'channel': 'Projekt Melody',
2654 'view_count': int,
2655 'availability': 'needs_auth',
2656 'comment_count': int,
2657 'live_status': 'not_live',
2658 'duration': 106,
2659 'uploader': 'Projekt Melody',
2660 'uploader_id': '@ProjektMelody',
2661 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
2662 'timestamp': 1577508724,
2663 },
2664 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
2665 },
2666 {
2667 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
2668 'info_dict': {
2669 'id': 'qVv6vCqciTM',
2670 'ext': 'mp4',
2671 'age_limit': 0,
2672 'comment_count': int,
2673 'chapters': 'count:13',
2674 'upload_date': '20221223',
2675 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
2676 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
2677 'like_count': int,
2678 'release_date': '20221223',
2679 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
2680 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
2681 'view_count': int,
2682 'playable_in_embed': True,
2683 'duration': 4438,
2684 'availability': 'public',
2685 'channel_follower_count': int,
2686 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
2687 'categories': ['Entertainment'],
2688 'live_status': 'was_live',
2689 'release_timestamp': 1671793345,
2690 'channel': 'さなちゃんねる',
2691 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
2692 'uploader': 'さなちゃんねる',
2693 'uploader_url': 'https://www.youtube.com/@sana_natori',
2694 'uploader_id': '@sana_natori',
2695 'channel_is_verified': True,
2696 'heatmap': 'count:100',
2697 'timestamp': 1671798112,
2698 },
2699 },
2700 {
2701 # Fallbacks when webpage and web client is unavailable
2702 'url': 'https://www.youtube.com/watch?v=wSSmNUl9Snw',
2703 'info_dict': {
2704 'id': 'wSSmNUl9Snw',
2705 'ext': 'mp4',
2706 # 'categories': ['Science & Technology'],
2707 'view_count': int,
2708 'chapters': 'count:2',
2709 'channel': 'Scott Manley',
2710 'like_count': int,
2711 'age_limit': 0,
2712 # 'availability': 'public',
2713 'channel_follower_count': int,
2714 'live_status': 'not_live',
2715 'upload_date': '20170831',
2716 'duration': 682,
2717 'tags': 'count:8',
2718 'uploader_url': 'https://www.youtube.com/@scottmanley',
2719 'description': 'md5:f4bed7b200404b72a394c2f97b782c02',
2720 'uploader': 'Scott Manley',
2721 'uploader_id': '@scottmanley',
2722 'title': 'The Computer Hack That Saved Apollo 14',
2723 'channel_id': 'UCxzC4EngIsMrPmbm6Nxvb-A',
2724 'thumbnail': r're:^https?://.*\.webp',
2725 'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
2726 'playable_in_embed': True,
2727 'comment_count': int,
2728 'channel_is_verified': True,
2729 'heatmap': 'count:100',
2730 },
2731 'params': {
2732 'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
2733 },
2734 },
2735 ]
2736
2737 _WEBPAGE_TESTS = [
2738 # YouTube <object> embed
2739 {
2740 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2741 'md5': '873c81d308b979f0e23ee7e620b312a3',
2742 'info_dict': {
2743 'id': 'msN87y-iEx0',
2744 'ext': 'mp4',
2745 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2746 'upload_date': '20080526',
2747 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2748 'age_limit': 0,
2749 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2750 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2751 'playable_in_embed': True,
2752 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2753 'like_count': int,
2754 'comment_count': int,
2755 'channel': 'Christopher Sykes',
2756 'live_status': 'not_live',
2757 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2758 'availability': 'public',
2759 'duration': 195,
2760 'view_count': int,
2761 'categories': ['Science & Technology'],
2762 'channel_follower_count': int,
2763 'uploader': 'Christopher Sykes',
2764 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
2765 'uploader_id': '@ChristopherSykesDocumentaries',
2766 'heatmap': 'count:100',
2767 'timestamp': 1211825920,
2768 },
2769 'params': {
2770 'skip_download': True,
2771 },
2772 },
2773 ]
2774
2775 @classmethod
2776 def suitable(cls, url):
2777 from ..utils import parse_qs
2778
2779 qs = parse_qs(url)
2780 if qs.get('list', [None])[0]:
2781 return False
2782 return super().suitable(url)
2783
2784 def __init__(self, *args, **kwargs):
2785 super().__init__(*args, **kwargs)
2786 self._code_cache = {}
2787 self._player_cache = {}
2788
2789 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
2790 lock = threading.Lock()
2791 start_time = time.time()
2792 formats = [f for f in formats if f.get('is_from_start')]
2793
2794 def refetch_manifest(format_id, delay):
2795 nonlocal formats, start_time, is_live
2796 if time.time() <= start_time + delay:
2797 return
2798
2799 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2800 video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
2801 microformats = traverse_obj(
2802 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2803 expected_type=dict)
2804 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2805 is_live = live_status == 'is_live'
2806 start_time = time.time()
2807
2808 def mpd_feed(format_id, delay):
2809 """
2810 @returns (manifest_url, manifest_stream_number, is_live) or None
2811 """
2812 for retry in self.RetryManager(fatal=False):
2813 with lock:
2814 refetch_manifest(format_id, delay)
2815
2816 f = next((f for f in formats if f['format_id'] == format_id), None)
2817 if not f:
2818 if not is_live:
2819 retry.error = f'{video_id}: Video is no longer live'
2820 else:
2821 retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
2822 continue
2823 return f['manifest_url'], f['manifest_stream_number'], is_live
2824 return None
2825
2826 for f in formats:
2827 f['is_live'] = is_live
2828 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2829 live_start_time, mpd_feed, not is_live and f.copy())
2830 if is_live:
2831 f['fragments'] = gen
2832 f['protocol'] = 'http_dash_segments_generator'
2833 else:
2834 f['fragments'] = LazyList(gen({}))
2835 del f['is_from_start']
2836
2837 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
2838 FETCH_SPAN, MAX_DURATION = 5, 432000
2839
2840 mpd_url, stream_number, is_live = None, None, True
2841
2842 begin_index = 0
2843 download_start_time = ctx.get('start') or time.time()
2844
2845 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2846 if lack_early_segments:
2847 self.report_warning(bug_reports_message(
2848 'Starting download from the last 120 hours of the live stream since '
2849 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2850 lack_early_segments = True
2851
2852 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2853 fragments, fragment_base_url = None, None
2854
2855 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2856 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2857 # Obtain from MPD's maximum seq value
2858 old_mpd_url = mpd_url
2859 last_error = ctx.pop('last_error', None)
2860 expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
2861 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2862 or (mpd_url, stream_number, False))
2863 if not refresh_sequence:
2864 if expire_fast and not is_live:
2865 return False, last_seq
2866 elif old_mpd_url == mpd_url:
2867 return True, last_seq
2868 if manifestless_orig_fmt:
2869 fmt_info = manifestless_orig_fmt
2870 else:
2871 try:
2872 fmts, _ = self._extract_mpd_formats_and_subtitles(
2873 mpd_url, None, note=False, errnote=False, fatal=False)
2874 except ExtractorError:
2875 fmts = None
2876 if not fmts:
2877 no_fragment_score += 2
2878 return False, last_seq
2879 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2880 fragments = fmt_info['fragments']
2881 fragment_base_url = fmt_info['fragment_base_url']
2882 assert fragment_base_url
2883
2884 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2885 return True, _last_seq
2886
2887 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
2888 while is_live:
2889 fetch_time = time.time()
2890 if no_fragment_score > 30:
2891 return
2892 if last_segment_url:
2893 # Obtain from "X-Head-Seqnum" header value from each segment
2894 try:
2895 urlh = self._request_webpage(
2896 last_segment_url, None, note=False, errnote=False, fatal=False)
2897 except ExtractorError:
2898 urlh = None
2899 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2900 if last_seq is None:
2901 no_fragment_score += 2
2902 last_segment_url = None
2903 continue
2904 else:
2905 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2906 no_fragment_score += 2
2907 if not should_continue:
2908 continue
2909
2910 if known_idx > last_seq:
2911 last_segment_url = None
2912 continue
2913
2914 last_seq += 1
2915
2916 if begin_index < 0 and known_idx < 0:
2917 # skip from the start when it's negative value
2918 known_idx = last_seq + begin_index
2919 if lack_early_segments:
2920 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2921 try:
2922 for idx in range(known_idx, last_seq):
2923 # do not update sequence here or you'll get skipped some part of it
2924 should_continue, _ = _extract_sequence_from_mpd(False, False)
2925 if not should_continue:
2926 known_idx = idx - 1
2927 raise ExtractorError('breaking out of outer loop')
2928 last_segment_url = urljoin(fragment_base_url, f'sq/{idx}')
2929 yield {
2930 'url': last_segment_url,
2931 'fragment_count': last_seq,
2932 }
2933 if known_idx == last_seq:
2934 no_fragment_score += 5
2935 else:
2936 no_fragment_score = 0
2937 known_idx = last_seq
2938 except ExtractorError:
2939 continue
2940
2941 if manifestless_orig_fmt:
2942 # Stop at the first iteration if running for post-live manifestless;
2943 # fragment count no longer increase since it starts
2944 break
2945
2946 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2947
2948 def _extract_player_url(self, *ytcfgs, webpage=None):
2949 player_url = traverse_obj(
2950 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2951 get_all=False, expected_type=str)
2952 if not player_url:
2953 return
2954 return urljoin('https://www.youtube.com', player_url)
2955
2956 def _download_player_url(self, video_id, fatal=False):
2957 res = self._download_webpage(
2958 'https://www.youtube.com/iframe_api',
2959 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2960 if res:
2961 player_version = self._search_regex(
2962 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2963 if player_version:
2964 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2965
2966 def _signature_cache_id(self, example_sig):
2967 """ Return a string representation of a signature """
2968 return '.'.join(str(len(part)) for part in example_sig.split('.'))
2969
2970 @classmethod
2971 def _extract_player_info(cls, player_url):
2972 for player_re in cls._PLAYER_INFO_RE:
2973 id_m = re.search(player_re, player_url)
2974 if id_m:
2975 break
2976 else:
2977 raise ExtractorError(f'Cannot identify player {player_url!r}')
2978 return id_m.group('id')
2979
2980 def _load_player(self, video_id, player_url, fatal=True):
2981 player_id = self._extract_player_info(player_url)
2982 if player_id not in self._code_cache:
2983 code = self._download_webpage(
2984 player_url, video_id, fatal=fatal,
2985 note='Downloading player ' + player_id,
2986 errnote=f'Download of {player_url} failed')
2987 if code:
2988 self._code_cache[player_id] = code
2989 return self._code_cache.get(player_id)
2990
2991 def _extract_signature_function(self, video_id, player_url, example_sig):
2992 player_id = self._extract_player_info(player_url)
2993
2994 # Read from filesystem cache
2995 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
2996 assert os.path.basename(func_id) == func_id
2997
2998 self.write_debug(f'Extracting signature function {func_id}')
2999 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
3000
3001 if not cache_spec:
3002 code = self._load_player(video_id, player_url)
3003 if code:
3004 res = self._parse_sig_js(code)
3005 test_string = ''.join(map(chr, range(len(example_sig))))
3006 cache_spec = [ord(c) for c in res(test_string)]
3007 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
3008
3009 return lambda s: ''.join(s[i] for i in cache_spec)
3010
3011 def _print_sig_code(self, func, example_sig):
3012 if not self.get_param('youtube_print_sig_code'):
3013 return
3014
3015 def gen_sig_code(idxs):
3016 def _genslice(start, end, step):
3017 starts = '' if start == 0 else str(start)
3018 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
3019 steps = '' if step == 1 else (':%d' % step)
3020 return f's[{starts}{ends}{steps}]'
3021
3022 step = None
3023 # Quelch pyflakes warnings - start will be set when step is set
3024 start = '(Never used)'
3025 for i, prev in zip(idxs[1:], idxs[:-1]):
3026 if step is not None:
3027 if i - prev == step:
3028 continue
3029 yield _genslice(start, prev, step)
3030 step = None
3031 continue
3032 if i - prev in [-1, 1]:
3033 step = i - prev
3034 start = prev
3035 continue
3036 else:
3037 yield 's[%d]' % prev
3038 if step is None:
3039 yield 's[%d]' % i
3040 else:
3041 yield _genslice(start, i, step)
3042
3043 test_string = ''.join(map(chr, range(len(example_sig))))
3044 cache_res = func(test_string)
3045 cache_spec = [ord(c) for c in cache_res]
3046 expr_code = ' + '.join(gen_sig_code(cache_spec))
3047 signature_id_tuple = '({})'.format(', '.join(str(len(p)) for p in example_sig.split('.')))
3048 code = (f'if tuple(len(p) for p in s.split(\'.\')) == {signature_id_tuple}:\n'
3049 f' return {expr_code}\n')
3050 self.to_screen('Extracted signature function:\n' + code)
3051
3052 def _parse_sig_js(self, jscode):
3053 funcname = self._search_regex(
3054 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3055 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3056 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
3057 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
3058 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
3059 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
3060 # Obsolete patterns
3061 r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3062 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
3063 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3064 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3065 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3066 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3067 jscode, 'Initial JS player signature function name', group='sig')
3068
3069 jsi = JSInterpreter(jscode)
3070 initial_function = jsi.extract_function(funcname)
3071 return lambda s: initial_function([s])
3072
3073 def _cached(self, func, *cache_id):
3074 def inner(*args, **kwargs):
3075 if cache_id not in self._player_cache:
3076 try:
3077 self._player_cache[cache_id] = func(*args, **kwargs)
3078 except ExtractorError as e:
3079 self._player_cache[cache_id] = e
3080 except Exception as e:
3081 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
3082
3083 ret = self._player_cache[cache_id]
3084 if isinstance(ret, Exception):
3085 raise ret
3086 return ret
3087 return inner
3088
3089 def _decrypt_signature(self, s, video_id, player_url):
3090 """Turn the encrypted s field into a working signature"""
3091 extract_sig = self._cached(
3092 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
3093 func = extract_sig(video_id, player_url, s)
3094 self._print_sig_code(func, s)
3095 return func(s)
3096
3097 def _decrypt_nsig(self, s, video_id, player_url):
3098 """Turn the encrypted n field into a working signature"""
3099 if player_url is None:
3100 raise ExtractorError('Cannot decrypt nsig without player_url')
3101 player_url = urljoin('https://www.youtube.com', player_url)
3102
3103 try:
3104 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
3105 except ExtractorError as e:
3106 raise ExtractorError('Unable to extract nsig function code', cause=e)
3107 if self.get_param('youtube_print_sig_code'):
3108 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
3109
3110 try:
3111 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
3112 ret = extract_nsig(jsi, func_code)(s)
3113 except JSInterpreter.Exception as e:
3114 try:
3115 jsi = PhantomJSwrapper(self, timeout=5000)
3116 except ExtractorError:
3117 raise e
3118 self.report_warning(
3119 f'Native nsig extraction failed: Trying with PhantomJS\n'
3120 f' n = {s} ; player = {player_url}', video_id)
3121 self.write_debug(e, only_once=True)
3122
3123 args, func_body = func_code
3124 ret = jsi.execute(
3125 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
3126 video_id=video_id, note='Executing signature code').strip()
3127
3128 self.write_debug(f'Decrypted nsig {s} => {ret}')
3129 return ret
3130
3131 def _extract_n_function_name(self, jscode):
3132 funcname, idx = self._search_regex(
3133 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
3134 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
3135 if not idx:
3136 return funcname
3137
3138 return json.loads(js_to_json(self._search_regex(
3139 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
3140 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
3141
3142 def _extract_n_function_code(self, video_id, player_url):
3143 player_id = self._extract_player_info(player_url)
3144 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
3145 jscode = func_code or self._load_player(video_id, player_url)
3146 jsi = JSInterpreter(jscode)
3147
3148 if func_code:
3149 return jsi, player_id, func_code
3150
3151 func_name = self._extract_n_function_name(jscode)
3152
3153 # For redundancy
3154 func_code = self._search_regex(
3155 rf'''(?xs){func_name}\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
3156 # NB: The end of the regex is intentionally kept strict
3157 {{(?P<code>.+?}}\s*return\ [\w$]+.join\(""\))}};''',
3158 jscode, 'nsig function', group=('var', 'code'), default=None)
3159 if func_code:
3160 func_code = ([func_code[0]], func_code[1])
3161 else:
3162 self.write_debug('Extracting nsig function with jsinterp')
3163 func_code = jsi.extract_function_code(func_name)
3164
3165 self.cache.store('youtube-nsig', player_id, func_code)
3166 return jsi, player_id, func_code
3167
3168 def _extract_n_function_from_code(self, jsi, func_code):
3169 func = jsi.extract_function_from_code(*func_code)
3170
3171 def extract_nsig(s):
3172 try:
3173 ret = func([s])
3174 except JSInterpreter.Exception:
3175 raise
3176 except Exception as e:
3177 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
3178
3179 if ret.startswith('enhanced_except_'):
3180 raise JSInterpreter.Exception('Signature function returned an exception')
3181 return ret
3182
3183 return extract_nsig
3184
3185 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
3186 """
3187 Extract signatureTimestamp (sts)
3188 Required to tell API what sig/player version is in use.
3189 """
3190 sts = None
3191 if isinstance(ytcfg, dict):
3192 sts = int_or_none(ytcfg.get('STS'))
3193
3194 if not sts:
3195 # Attempt to extract from player
3196 if player_url is None:
3197 error_msg = 'Cannot extract signature timestamp without player_url.'
3198 if fatal:
3199 raise ExtractorError(error_msg)
3200 self.report_warning(error_msg)
3201 return
3202 code = self._load_player(video_id, player_url, fatal=fatal)
3203 if code:
3204 sts = int_or_none(self._search_regex(
3205 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
3206 'JS player signature timestamp', group='sts', fatal=fatal))
3207 return sts
3208
3209 def _mark_watched(self, video_id, player_responses):
3210 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
3211 label = 'fully ' if is_full else ''
3212 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
3213 expected_type=url_or_none)
3214 if not url:
3215 self.report_warning(f'Unable to mark {label}watched')
3216 return
3217 parsed_url = urllib.parse.urlparse(url)
3218 qs = urllib.parse.parse_qs(parsed_url.query)
3219
3220 # cpn generation algorithm is reverse engineered from base.js.
3221 # In fact it works even with dummy cpn.
3222 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
3223 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16))
3224
3225 # # more consistent results setting it to right before the end
3226 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
3227
3228 qs.update({
3229 'ver': ['2'],
3230 'cpn': [cpn],
3231 'cmt': video_length,
3232 'el': 'detailpage', # otherwise defaults to "shorts"
3233 })
3234
3235 if is_full:
3236 # these seem to mark watchtime "history" in the real world
3237 # they're required, so send in a single value
3238 qs.update({
3239 'st': 0,
3240 'et': video_length,
3241 })
3242
3243 url = urllib.parse.urlunparse(
3244 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
3245
3246 self._download_webpage(
3247 url, video_id, f'Marking {label}watched',
3248 'Unable to mark watched', fatal=False)
3249
3250 @classmethod
3251 def _extract_from_webpage(cls, url, webpage):
3252 # Invidious Instances
3253 # https://github.com/yt-dlp/yt-dlp/issues/195
3254 # https://github.com/iv-org/invidious/pull/1730
3255 mobj = re.search(
3256 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3257 webpage)
3258 if mobj:
3259 yield cls.url_result(mobj.group('url'), cls)
3260 raise cls.StopExtraction
3261
3262 yield from super()._extract_from_webpage(url, webpage)
3263
3264 # lazyYT YouTube embed
3265 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3266 yield cls.url_result(unescapeHTML(id_), cls, id_)
3267
3268 # Wordpress "YouTube Video Importer" plugin
3269 for m in re.findall(r'''(?x)<div[^>]+
3270 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3271 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3272 yield cls.url_result(m[-1], cls, m[-1])
3273
3274 @classmethod
3275 def extract_id(cls, url):
3276 video_id = cls.get_temp_id(url)
3277 if not video_id:
3278 raise ExtractorError(f'Invalid URL: {url}')
3279 return video_id
3280
3281 def _extract_chapters_from_json(self, data, duration):
3282 chapter_list = traverse_obj(
3283 data, (
3284 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
3285 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters',
3286 ), expected_type=list)
3287
3288 return self._extract_chapters_helper(
3289 chapter_list,
3290 start_function=lambda chapter: float_or_none(
3291 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
3292 title_function=lambda chapter: traverse_obj(
3293 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3294 duration=duration)
3295
3296 def _extract_chapters_from_engagement_panel(self, data, duration):
3297 content_list = traverse_obj(
3298 data,
3299 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
3300 expected_type=list)
3301 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3302 chapter_title = lambda chapter: self._get_text(chapter, 'title')
3303
3304 return next(filter(None, (
3305 self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3306 chapter_time, chapter_title, duration)
3307 for contents in content_list)), [])
3308
3309 def _extract_heatmap(self, data):
3310 return traverse_obj(data, (
3311 'frameworkUpdates', 'entityBatchUpdate', 'mutations',
3312 lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
3313 'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
3314 'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),
3315 'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
3316 'value': ('intensityScoreNormalized', {float_or_none}),
3317 })) or None
3318
3319 def _extract_comment(self, entities, parent=None):
3320 comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict}))
3321 if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))):
3322 return
3323
3324 toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict}))
3325 time_text = traverse_obj(comment_entity_payload, ('properties', 'publishedTime', {str})) or ''
3326
3327 return {
3328 'id': comment_id,
3329 'parent': parent or 'root',
3330 **traverse_obj(comment_entity_payload, {
3331 'text': ('properties', 'content', 'content', {str}),
3332 'like_count': ('toolbar', 'likeCountA11y', {parse_count}),
3333 'author_id': ('author', 'channelId', {self.ucid_or_none}),
3334 'author': ('author', 'displayName', {str}),
3335 'author_thumbnail': ('author', 'avatarThumbnailUrl', {url_or_none}),
3336 'author_is_uploader': ('author', 'isCreator', {bool}),
3337 'author_is_verified': ('author', 'isVerified', {bool}),
3338 'author_url': ('author', 'channelCommand', 'innertubeCommand', (
3339 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'),
3340 ), {lambda x: urljoin('https://www.youtube.com', x)}),
3341 }, get_all=False),
3342 'is_favorited': (None if toolbar_entity_payload is None else
3343 toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),
3344 '_time_text': time_text, # FIXME: non-standard, but we need a way of showing that it is an estimate.
3345 'timestamp': self._parse_time_text(time_text),
3346 }
3347
3348 def _extract_comment_old(self, comment_renderer, parent=None):
3349 comment_id = comment_renderer.get('commentId')
3350 if not comment_id:
3351 return
3352
3353 info = {
3354 'id': comment_id,
3355 'text': self._get_text(comment_renderer, 'contentText'),
3356 'like_count': self._get_count(comment_renderer, 'voteCount'),
3357 'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),
3358 'author': self._get_text(comment_renderer, 'authorText'),
3359 'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),
3360 'parent': parent or 'root',
3361 }
3362
3363 # Timestamp is an estimate calculated from the current time and time_text
3364 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3365 timestamp = self._parse_time_text(time_text)
3366
3367 info.update({
3368 # FIXME: non-standard, but we need a way of showing that it is an estimate.
3369 '_time_text': time_text,
3370 'timestamp': timestamp,
3371 })
3372
3373 info['author_url'] = urljoin(
3374 'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (
3375 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),
3376 expected_type=str, get_all=False))
3377
3378 author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')
3379 if author_is_uploader is not None:
3380 info['author_is_uploader'] = author_is_uploader
3381
3382 comment_abr = traverse_obj(
3383 comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
3384 if comment_abr is not None:
3385 info['is_favorited'] = 'creatorHeart' in comment_abr
3386
3387 badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
3388 if self._has_badge(badges, BadgeType.VERIFIED):
3389 info['author_is_verified'] = True
3390
3391 is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
3392 if is_pinned:
3393 info['is_pinned'] = True
3394
3395 return info
3396
3397 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3398
3399 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
3400
3401 def extract_header(contents):
3402 _continuation = None
3403 for content in contents:
3404 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
3405 expected_comment_count = self._get_count(
3406 comments_header_renderer, 'countText', 'commentsCount')
3407
3408 if expected_comment_count is not None:
3409 tracker['est_total'] = expected_comment_count
3410 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3411 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
3412
3413 sort_menu_item = try_get(
3414 comments_header_renderer,
3415 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3416 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3417
3418 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3419 if not _continuation:
3420 continue
3421
3422 sort_text = str_or_none(sort_menu_item.get('title'))
3423 if not sort_text:
3424 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
3425 self.to_screen(f'Sorting comments by {sort_text.lower()}')
3426 break
3427 return _continuation
3428
3429 def extract_thread(contents, entity_payloads):
3430 if not parent:
3431 tracker['current_page_thread'] = 0
3432 for content in contents:
3433 if not parent and tracker['total_parent_comments'] >= max_parents:
3434 yield
3435 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
3436
3437 # old comment format
3438 if not entity_payloads:
3439 comment_renderer = get_first(
3440 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3441 expected_type=dict, default={})
3442
3443 comment = self._extract_comment_old(comment_renderer, parent)
3444
3445 # new comment format
3446 else:
3447 view_model = (
3448 traverse_obj(comment_thread_renderer, ('commentViewModel', 'commentViewModel', {dict}))
3449 or traverse_obj(content, ('commentViewModel', {dict})))
3450 comment_keys = traverse_obj(view_model, (('commentKey', 'toolbarStateKey'), {str}))
3451 if not comment_keys:
3452 continue
3453 entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
3454 comment = self._extract_comment(entities, parent)
3455 if comment:
3456 comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
3457
3458 if not comment:
3459 continue
3460 comment_id = comment['id']
3461
3462 if comment.get('is_pinned'):
3463 tracker['pinned_comment_ids'].add(comment_id)
3464 # Sometimes YouTube may break and give us infinite looping comments.
3465 # See: https://github.com/yt-dlp/yt-dlp/issues/6290
3466 if comment_id in tracker['seen_comment_ids']:
3467 if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):
3468 # Pinned comments may appear a second time in newest first sort
3469 # See: https://github.com/yt-dlp/yt-dlp/issues/6712
3470 continue
3471 self.report_warning(
3472 'Detected YouTube comments looping. Stopping comment extraction '
3473 f'{"for this thread" if parent else ""} as we probably cannot get any more.')
3474 yield
3475 else:
3476 tracker['seen_comment_ids'].add(comment['id'])
3477
3478 tracker['running_total'] += 1
3479 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
3480 yield comment
3481
3482 # Attempt to get the replies
3483 comment_replies_renderer = try_get(
3484 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3485
3486 if comment_replies_renderer:
3487 tracker['current_page_thread'] += 1
3488 comment_entries_iter = self._comment_entries(
3489 comment_replies_renderer, ytcfg, video_id,
3490 parent=comment.get('id'), tracker=tracker)
3491 yield from itertools.islice(comment_entries_iter, min(
3492 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
3493
3494 # Keeps track of counts across recursive calls
3495 if not tracker:
3496 tracker = {
3497 'running_total': 0,
3498 'est_total': None,
3499 'current_page_thread': 0,
3500 'total_parent_comments': 0,
3501 'total_reply_comments': 0,
3502 'seen_comment_ids': set(),
3503 'pinned_comment_ids': set(),
3504 }
3505
3506 # TODO: Deprecated
3507 # YouTube comments have a max depth of 2
3508 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3509 if max_depth:
3510 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3511 'Set max replies in the max-comments extractor argument instead')
3512 if max_depth == 1 and parent:
3513 return
3514
3515 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = (
3516 int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 4)
3517
3518 continuation = self._extract_continuation(root_continuation_data)
3519
3520 response = None
3521 is_forced_continuation = False
3522 is_first_continuation = parent is None
3523 if is_first_continuation and not continuation:
3524 # Sometimes you can get comments by generating the continuation yourself,
3525 # even if YouTube initially reports them being disabled - e.g. stories comments.
3526 # Note: if the comment section is actually disabled, YouTube may return a response with
3527 # required check_get_keys missing. So we will disable that check initially in this case.
3528 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3529 is_forced_continuation = True
3530
3531 continuation_items_path = (
3532 'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
3533 for page_num in itertools.count(0):
3534 if not continuation:
3535 break
3536 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3537 comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"
3538 if page_num == 0:
3539 if is_first_continuation:
3540 note_prefix = 'Downloading comment section API JSON'
3541 else:
3542 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
3543 tracker['current_page_thread'], comment_prog_str)
3544 else:
3545 note_prefix = '{}Downloading comment{} API JSON page {} {}'.format(
3546 ' ' if parent else '', ' replies' if parent else '',
3547 page_num, comment_prog_str)
3548
3549 # Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
3550 # Ignore check if YouTube says the comment count is 0.
3551 check_get_keys = None
3552 if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
3553 check_get_keys = [[*continuation_items_path, ..., (
3554 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentViewModel', 'commentRenderer'))]]
3555 try:
3556 response = self._extract_response(
3557 item_id=None, query=continuation,
3558 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3559 check_get_keys=check_get_keys)
3560 except ExtractorError as e:
3561 # Ignore incomplete data error for replies if retries didn't work.
3562 # This is to allow any other parent comments and comment threads to be downloaded.
3563 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
3564 if 'incomplete data' in str(e).lower() and parent:
3565 if self.get_param('ignoreerrors') in (True, 'only_download'):
3566 self.report_warning(
3567 'Received incomplete data for a comment reply thread and retrying did not help. '
3568 'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
3569 return
3570 else:
3571 raise ExtractorError(
3572 'Incomplete data received for comment reply thread. '
3573 'Pass --ignore-errors to ignore and allow rest of comments to download.',
3574 expected=True)
3575 raise
3576 is_forced_continuation = False
3577 continuation = None
3578 mutations = traverse_obj(response, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict}))
3579 for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
3580 if is_first_continuation:
3581 continuation = extract_header(continuation_items)
3582 is_first_continuation = False
3583 if continuation:
3584 break
3585 continue
3586
3587 for entry in extract_thread(continuation_items, mutations):
3588 if not entry:
3589 return
3590 yield entry
3591 continuation = self._extract_continuation({'contents': continuation_items})
3592 if continuation:
3593 break
3594
3595 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3596 if message and not parent and tracker['running_total'] == 0:
3597 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3598 raise self.CommentsDisabled
3599
3600 @staticmethod
3601 def _generate_comment_continuation(video_id):
3602 """
3603 Generates initial comment section continuation token from given video id
3604 """
3605 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3606 return base64.b64encode(token.encode()).decode()
3607
3608 def _get_comments(self, ytcfg, video_id, contents, webpage):
3609 """Entry for comment extraction"""
3610 def _real_comment_extract(contents):
3611 renderer = next((
3612 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3613 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3614 yield from self._comment_entries(renderer, ytcfg, video_id)
3615
3616 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3617 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3618
3619 @staticmethod
3620 def _get_checkok_params():
3621 return {'contentCheckOk': True, 'racyCheckOk': True}
3622
3623 @classmethod
3624 def _generate_player_context(cls, sts=None):
3625 context = {
3626 'html5Preference': 'HTML5_PREF_WANTS',
3627 }
3628 if sts is not None:
3629 context['signatureTimestamp'] = sts
3630 return {
3631 'playbackContext': {
3632 'contentPlaybackContext': context,
3633 },
3634 **cls._get_checkok_params(),
3635 }
3636
3637 @staticmethod
3638 def _is_agegated(player_response):
3639 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3640 return True
3641
3642 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))
3643 AGE_GATE_REASONS = (
3644 'confirm your age', 'age-restricted', 'inappropriate', # reason
3645 'age_verification_required', 'age_check_required', # status
3646 )
3647 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3648
3649 @staticmethod
3650 def _is_unplayable(player_response):
3651 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3652
3653 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
3654
3655 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3656 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3657 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3658 headers = self.generate_api_headers(
3659 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3660
3661 yt_query = {
3662 'videoId': video_id,
3663 }
3664
3665 pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
3666 if pp_arg:
3667 yt_query['params'] = pp_arg
3668
3669 yt_query.update(self._generate_player_context(sts))
3670 return self._extract_response(
3671 item_id=video_id, ep='player', query=yt_query,
3672 ytcfg=player_ytcfg, headers=headers, fatal=True,
3673 default_client=client,
3674 note='Downloading {} player API JSON'.format(client.replace('_', ' ').strip()),
3675 ) or None
3676
3677 def _get_requested_clients(self, url, smuggled_data):
3678 requested_clients = []
3679 android_clients = []
3680 default = ['ios', 'web']
3681 allowed_clients = sorted(
3682 (client for client in INNERTUBE_CLIENTS if client[:1] != '_'),
3683 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3684 for client in self._configuration_arg('player_client'):
3685 if client == 'default':
3686 requested_clients.extend(default)
3687 elif client == 'all':
3688 requested_clients.extend(allowed_clients)
3689 elif client not in allowed_clients:
3690 self.report_warning(f'Skipping unsupported client {client}')
3691 elif client.startswith('android'):
3692 android_clients.append(client)
3693 else:
3694 requested_clients.append(client)
3695 # Force deprioritization of broken Android clients for format de-duplication
3696 requested_clients.extend(android_clients)
3697 if not requested_clients:
3698 requested_clients = default
3699
3700 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3701 requested_clients.extend(
3702 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
3703
3704 return orderedSet(requested_clients)
3705
3706 def _invalid_player_response(self, pr, video_id):
3707 # YouTube may return a different video player response than expected.
3708 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3709 if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
3710 return pr_id
3711
3712 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
3713 initial_pr = None
3714 if webpage:
3715 initial_pr = self._search_json(
3716 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3717
3718 prs = []
3719 if initial_pr and not self._invalid_player_response(initial_pr, video_id):
3720 # Android player_response does not have microFormats which are needed for
3721 # extraction of some data. So we return the initial_pr with formats
3722 # stripped out even if not requested by the user
3723 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3724 prs.append({**initial_pr, 'streamingData': None})
3725
3726 all_clients = set(clients)
3727 clients = clients[::-1]
3728
3729 def append_client(*client_names):
3730 """ Append the first client name that exists but not already used """
3731 for client_name in client_names:
3732 actual_client = _split_innertube_client(client_name)[0]
3733 if actual_client in INNERTUBE_CLIENTS:
3734 if actual_client not in all_clients:
3735 clients.append(client_name)
3736 all_clients.add(actual_client)
3737 return
3738
3739 tried_iframe_fallback = False
3740 player_url = None
3741 skipped_clients = {}
3742 while clients:
3743 client, base_client, variant = _split_innertube_client(clients.pop())
3744 player_ytcfg = master_ytcfg if client == 'web' else {}
3745 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3746 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3747
3748 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3749 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3750 if 'js' in self._configuration_arg('player_skip'):
3751 require_js_player = False
3752 player_url = None
3753
3754 if not player_url and not tried_iframe_fallback and require_js_player:
3755 player_url = self._download_player_url(video_id)
3756 tried_iframe_fallback = True
3757
3758 try:
3759 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
3760 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
3761 except ExtractorError as e:
3762 self.report_warning(e)
3763 continue
3764
3765 if pr_id := self._invalid_player_response(pr, video_id):
3766 skipped_clients[client] = pr_id
3767 elif pr:
3768 # Save client name for introspection later
3769 name = short_client_name(client)
3770 sd = traverse_obj(pr, ('streamingData', {dict})) or {}
3771 sd[STREAMING_DATA_CLIENT_NAME] = name
3772 for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
3773 f[STREAMING_DATA_CLIENT_NAME] = name
3774 prs.append(pr)
3775
3776 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
3777 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3778 append_client(f'{base_client}_creator')
3779 elif self._is_agegated(pr):
3780 if variant == 'tv_embedded':
3781 append_client(f'{base_client}_embedded')
3782 elif not variant:
3783 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
3784
3785 if skipped_clients:
3786 self.report_warning(
3787 f'Skipping player responses from {"/".join(skipped_clients)} clients '
3788 f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
3789 if not prs:
3790 raise ExtractorError(
3791 'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
3792 elif not prs:
3793 raise ExtractorError('Failed to extract any player response')
3794 return prs, player_url
3795
3796 def _needs_live_processing(self, live_status, duration):
3797 if (live_status == 'is_live' and self.get_param('live_from_start')
3798 or live_status == 'post_live' and (duration or 0) > 2 * 3600):
3799 return live_status
3800
3801 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
3802 CHUNK_SIZE = 10 << 20
3803 PREFERRED_LANG_VALUE = 10
3804 original_language = None
3805 itags, stream_ids = collections.defaultdict(set), []
3806 itag_qualities, res_qualities = {}, {0: None}
3807 q = qualities([
3808 # Normally tiny is the smallest video-only formats. But
3809 # audio-only formats with unknown quality may get tagged as tiny
3810 'tiny',
3811 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3812 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres',
3813 ])
3814 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
3815 format_types = self._configuration_arg('formats')
3816 all_formats = 'duplicate' in format_types
3817 if self._configuration_arg('include_duplicate_formats'):
3818 all_formats = True
3819 self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
3820 'Use formats=duplicate extractor argument instead')
3821
3822 def build_fragments(f):
3823 return LazyList({
3824 'url': update_url_query(f['url'], {
3825 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}',
3826 }),
3827 } for range_start in range(0, f['filesize'], CHUNK_SIZE))
3828
3829 for fmt in streaming_formats:
3830 if fmt.get('targetDurationSec'):
3831 continue
3832
3833 itag = str_or_none(fmt.get('itag'))
3834 audio_track = fmt.get('audioTrack') or {}
3835 stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
3836 if not all_formats:
3837 if stream_id in stream_ids:
3838 continue
3839
3840 quality = fmt.get('quality')
3841 height = int_or_none(fmt.get('height'))
3842 if quality == 'tiny' or not quality:
3843 quality = fmt.get('audioQuality', '').lower() or quality
3844 # The 3gp format (17) in android client has a quality of "small",
3845 # but is actually worse than other formats
3846 if itag == '17':
3847 quality = 'tiny'
3848 if quality:
3849 if itag:
3850 itag_qualities[itag] = quality
3851 if height:
3852 res_qualities[height] = quality
3853
3854 is_default = audio_track.get('audioIsDefault')
3855 is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower()
3856 language_code = audio_track.get('id', '').split('.')[0]
3857 if language_code and is_default:
3858 original_language = language_code
3859
3860 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3861 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3862 # number of fragment that would subsequently requested with (`&sq=N`)
3863 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3864 continue
3865
3866 fmt_url = fmt.get('url')
3867 if not fmt_url:
3868 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3869 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3870 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3871 if not all((sc, fmt_url, player_url, encrypted_sig)):
3872 continue
3873 try:
3874 fmt_url += '&{}={}'.format(
3875 traverse_obj(sc, ('sp', -1)) or 'signature',
3876 self._decrypt_signature(encrypted_sig, video_id, player_url),
3877 )
3878 except ExtractorError as e:
3879 self.report_warning('Signature extraction failed: Some formats may be missing',
3880 video_id=video_id, only_once=True)
3881 self.write_debug(e, only_once=True)
3882 continue
3883
3884 query = parse_qs(fmt_url)
3885 if query.get('n'):
3886 try:
3887 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3888 fmt_url = update_url_query(fmt_url, {
3889 'n': decrypt_nsig(query['n'][0], video_id, player_url),
3890 })
3891 except ExtractorError as e:
3892 phantomjs_hint = ''
3893 if isinstance(e, JSInterpreter.Exception):
3894 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3895 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
3896 if player_url:
3897 self.report_warning(
3898 f'nsig extraction failed: Some formats may be missing\n{phantomjs_hint}'
3899 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3900 self.write_debug(e, only_once=True)
3901 else:
3902 self.report_warning(
3903 'Cannot decrypt nsig without player_url: Some formats may be missing',
3904 video_id=video_id, only_once=True)
3905 continue
3906
3907 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
3908 format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
3909 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3910 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
3911 # Make sure to avoid false positives with small duration differences.
3912 # E.g. __2ABJjxzNo, ySuUZEjARPY
3913 is_damaged = try_call(lambda: format_duration < duration // 2)
3914 if is_damaged:
3915 self.report_warning(
3916 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
3917
3918 client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
3919 # Android client formats are broken due to integrity check enforcement
3920 # Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
3921 is_broken = client_name and client_name.startswith(short_client_name('android'))
3922 if is_broken:
3923 self.report_warning(
3924 f'{video_id}: Android client formats are broken and may yield HTTP Error 403. '
3925 'They will be deprioritized', only_once=True)
3926
3927 name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
3928 fps = int_or_none(fmt.get('fps')) or 0
3929 dct = {
3930 'asr': int_or_none(fmt.get('audioSampleRate')),
3931 'filesize': int_or_none(fmt.get('contentLength')),
3932 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
3933 'format_note': join_nonempty(
3934 join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''),
3935 name, fmt.get('isDrc') and 'DRC',
3936 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3937 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
3938 is_damaged and 'DAMAGED', is_broken and 'BROKEN',
3939 (self.get_param('verbose') or all_formats) and client_name,
3940 delim=', '),
3941 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3942 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
3943 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
3944 'audio_channels': fmt.get('audioChannels'),
3945 'height': height,
3946 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
3947 'has_drm': bool(fmt.get('drmFamilies')),
3948 'tbr': tbr,
3949 'filesize_approx': filesize_from_tbr(tbr, format_duration),
3950 'url': fmt_url,
3951 'width': int_or_none(fmt.get('width')),
3952 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
3953 'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1,
3954 # Strictly de-prioritize broken, damaged and 3gp formats
3955 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
3956 }
3957 mime_mobj = re.match(
3958 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3959 if mime_mobj:
3960 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3961 dct.update(parse_codecs(mime_mobj.group(2)))
3962 if itag:
3963 itags[itag].add(('https', dct.get('language')))
3964 stream_ids.append(stream_id)
3965 single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
3966 if single_stream and dct.get('ext'):
3967 dct['container'] = dct['ext'] + '_dash'
3968
3969 if (all_formats or 'dashy' in format_types) and dct['filesize']:
3970 yield {
3971 **dct,
3972 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
3973 'protocol': 'http_dash_segments',
3974 'fragments': build_fragments(dct),
3975 }
3976 if all_formats or 'dashy' not in format_types:
3977 dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
3978 yield dct
3979
3980 needs_live_processing = self._needs_live_processing(live_status, duration)
3981 skip_bad_formats = 'incomplete' not in format_types
3982 if self._configuration_arg('include_incomplete_formats'):
3983 skip_bad_formats = False
3984 self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
3985 'Use formats=incomplete extractor argument instead')
3986
3987 skip_manifests = set(self._configuration_arg('skip'))
3988 if (not self.get_param('youtube_include_hls_manifest', True)
3989 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
3990 or needs_live_processing and skip_bad_formats):
3991 skip_manifests.add('hls')
3992
3993 if not self.get_param('youtube_include_dash_manifest', True):
3994 skip_manifests.add('dash')
3995 if self._configuration_arg('include_live_dash'):
3996 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
3997 'Use formats=incomplete extractor argument instead')
3998 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
3999 skip_manifests.add('dash')
4000
4001 def process_manifest_format(f, proto, client_name, itag):
4002 key = (proto, f.get('language'))
4003 if not all_formats and key in itags[itag]:
4004 return False
4005 itags[itag].add(key)
4006
4007 if itag and all_formats:
4008 f['format_id'] = f'{itag}-{proto}'
4009 elif any(p != proto for p, _ in itags[itag]):
4010 f['format_id'] = f'{itag}-{proto}'
4011 elif itag:
4012 f['format_id'] = itag
4013
4014 if original_language and f.get('language') == original_language:
4015 f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
4016 f['language_preference'] = PREFERRED_LANG_VALUE
4017
4018 if f.get('source_preference') is None:
4019 f['source_preference'] = -1
4020
4021 if itag in ('616', '235'):
4022 f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
4023 f['source_preference'] += 100
4024
4025 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
4026 if f['quality'] == -1 and f.get('height'):
4027 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
4028 if self.get_param('verbose') or all_formats:
4029 f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
4030 if f.get('fps') and f['fps'] <= 1:
4031 del f['fps']
4032
4033 if proto == 'hls' and f.get('has_drm'):
4034 f['has_drm'] = 'maybe'
4035 f['source_preference'] -= 5
4036 return True
4037
4038 subtitles = {}
4039 for sd in streaming_data:
4040 client_name = sd.get(STREAMING_DATA_CLIENT_NAME)
4041
4042 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
4043 if hls_manifest_url:
4044 fmts, subs = self._extract_m3u8_formats_and_subtitles(
4045 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
4046 subtitles = self._merge_subtitles(subs, subtitles)
4047 for f in fmts:
4048 if process_manifest_format(f, 'hls', client_name, self._search_regex(
4049 r'/itag/(\d+)', f['url'], 'itag', default=None)):
4050 yield f
4051
4052 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
4053 if dash_manifest_url:
4054 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
4055 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
4056 for f in formats:
4057 if process_manifest_format(f, 'dash', client_name, f['format_id']):
4058 f['filesize'] = int_or_none(self._search_regex(
4059 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
4060 if needs_live_processing:
4061 f['is_from_start'] = True
4062
4063 yield f
4064 yield subtitles
4065
4066 def _extract_storyboard(self, player_responses, duration):
4067 spec = get_first(
4068 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
4069 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
4070 if not base_url:
4071 return
4072 L = len(spec) - 1
4073 for i, args in enumerate(spec):
4074 args = args.split('#')
4075 counts = list(map(int_or_none, args[:5]))
4076 if len(args) != 8 or not all(counts):
4077 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
4078 continue
4079 width, height, frame_count, cols, rows = counts
4080 N, sigh = args[6:]
4081
4082 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
4083 fragment_count = frame_count / (cols * rows)
4084 fragment_duration = duration / fragment_count
4085 yield {
4086 'format_id': f'sb{i}',
4087 'format_note': 'storyboard',
4088 'ext': 'mhtml',
4089 'protocol': 'mhtml',
4090 'acodec': 'none',
4091 'vcodec': 'none',
4092 'url': url,
4093 'width': width,
4094 'height': height,
4095 'fps': frame_count / duration,
4096 'rows': rows,
4097 'columns': cols,
4098 'fragments': [{
4099 'url': url.replace('$M', str(j)),
4100 'duration': min(fragment_duration, duration - (j * fragment_duration)),
4101 } for j in range(math.ceil(fragment_count))],
4102 }
4103
4104 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
4105 webpage = None
4106 if 'webpage' not in self._configuration_arg('player_skip'):
4107 query = {'bpctr': '9999999999', 'has_verified': '1'}
4108 pp = self._configuration_arg('player_params', [None], casesense=True)[0]
4109 if pp:
4110 query['pp'] = pp
4111 webpage = self._download_webpage(
4112 webpage_url, video_id, fatal=False, query=query)
4113
4114 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
4115
4116 player_responses, player_url = self._extract_player_responses(
4117 self._get_requested_clients(url, smuggled_data),
4118 video_id, webpage, master_ytcfg, smuggled_data)
4119
4120 return webpage, master_ytcfg, player_responses, player_url
4121
4122 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
4123 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
4124 is_live = get_first(video_details, 'isLive')
4125 if is_live is None:
4126 is_live = get_first(live_broadcast_details, 'isLiveNow')
4127 live_content = get_first(video_details, 'isLiveContent')
4128 is_upcoming = get_first(video_details, 'isUpcoming')
4129 post_live = get_first(video_details, 'isPostLiveDvr')
4130 live_status = ('post_live' if post_live
4131 else 'is_live' if is_live
4132 else 'is_upcoming' if is_upcoming
4133 else 'was_live' if live_content
4134 else 'not_live' if False in (is_live, live_content)
4135 else None)
4136 streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
4137 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
4138 if all(f.get('has_drm') for f in formats):
4139 # If there are no formats that definitely don't have DRM, all have DRM
4140 for f in formats:
4141 f['has_drm'] = True
4142
4143 return live_broadcast_details, live_status, streaming_data, formats, subtitles
4144
4145 def _real_extract(self, url):
4146 url, smuggled_data = unsmuggle_url(url, {})
4147 video_id = self._match_id(url)
4148
4149 base_url = self.http_scheme() + '//www.youtube.com/'
4150 webpage_url = base_url + 'watch?v=' + video_id
4151
4152 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
4153
4154 playability_statuses = traverse_obj(
4155 player_responses, (..., 'playabilityStatus'), expected_type=dict)
4156
4157 trailer_video_id = get_first(
4158 playability_statuses,
4159 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
4160 expected_type=str)
4161 if trailer_video_id:
4162 return self.url_result(
4163 trailer_video_id, self.ie_key(), trailer_video_id)
4164
4165 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
4166 if webpage else (lambda x: None))
4167
4168 video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)
4169 microformats = traverse_obj(
4170 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
4171 expected_type=dict)
4172
4173 translated_title = self._get_text(microformats, (..., 'title'))
4174 video_title = (self._preferred_lang and translated_title
4175 or get_first(video_details, 'title') # primary
4176 or translated_title
4177 or search_meta(['og:title', 'twitter:title', 'title']))
4178 translated_description = self._get_text(microformats, (..., 'description'))
4179 original_description = get_first(video_details, 'shortDescription')
4180 video_description = (
4181 self._preferred_lang and translated_description
4182 # If original description is blank, it will be an empty string.
4183 # Do not prefer translated description in this case.
4184 or original_description if original_description is not None else translated_description)
4185
4186 multifeed_metadata_list = get_first(
4187 player_responses,
4188 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
4189 expected_type=str)
4190 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
4191 if self.get_param('noplaylist'):
4192 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
4193 else:
4194 entries = []
4195 feed_ids = []
4196 for feed in multifeed_metadata_list.split(','):
4197 # Unquote should take place before split on comma (,) since textual
4198 # fields may contain comma as well (see
4199 # https://github.com/ytdl-org/youtube-dl/issues/8536)
4200 feed_data = urllib.parse.parse_qs(
4201 urllib.parse.unquote_plus(feed))
4202
4203 def feed_entry(name):
4204 return try_get(
4205 feed_data, lambda x: x[name][0], str)
4206
4207 feed_id = feed_entry('id')
4208 if not feed_id:
4209 continue
4210 feed_title = feed_entry('title')
4211 title = video_title
4212 if feed_title:
4213 title += f' ({feed_title})'
4214 entries.append({
4215 '_type': 'url_transparent',
4216 'ie_key': 'Youtube',
4217 'url': smuggle_url(
4218 '{}watch?v={}'.format(base_url, feed_data['id'][0]),
4219 {'force_singlefeed': True}),
4220 'title': title,
4221 })
4222 feed_ids.append(feed_id)
4223 self.to_screen(
4224 'Downloading multifeed video ({}) - add --no-playlist to just download video {}'.format(
4225 ', '.join(feed_ids), video_id))
4226 return self.playlist_result(
4227 entries, video_id, video_title, video_description)
4228
4229 duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
4230 or int_or_none(get_first(microformats, 'lengthSeconds'))
4231 or parse_duration(search_meta('duration')) or None)
4232
4233 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
4234 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
4235 if live_status == 'post_live':
4236 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
4237
4238 if not formats:
4239 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
4240 self.report_drm(video_id)
4241 pemr = get_first(
4242 playability_statuses,
4243 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
4244 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
4245 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
4246 if subreason:
4247 if subreason == 'The uploader has not made this video available in your country.':
4248 countries = get_first(microformats, 'availableCountries')
4249 if not countries:
4250 regions_allowed = search_meta('regionsAllowed')
4251 countries = regions_allowed.split(',') if regions_allowed else None
4252 self.raise_geo_restricted(subreason, countries, metadata_available=True)
4253 reason += f'. {subreason}'
4254 if reason:
4255 self.raise_no_formats(reason, expected=True)
4256
4257 keywords = get_first(video_details, 'keywords', expected_type=list) or []
4258 if not keywords and webpage:
4259 keywords = [
4260 unescapeHTML(m.group('content'))
4261 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
4262 for keyword in keywords:
4263 if keyword.startswith('yt:stretch='):
4264 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
4265 if mobj:
4266 # NB: float is intentional for forcing float division
4267 w, h = (float(v) for v in mobj.groups())
4268 if w > 0 and h > 0:
4269 ratio = w / h
4270 for f in formats:
4271 if f.get('vcodec') != 'none':
4272 f['stretched_ratio'] = ratio
4273 break
4274 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
4275 thumbnail_url = search_meta(['og:image', 'twitter:image'])
4276 if thumbnail_url:
4277 thumbnails.append({
4278 'url': thumbnail_url,
4279 })
4280 original_thumbnails = thumbnails.copy()
4281
4282 # The best resolution thumbnails sometimes does not appear in the webpage
4283 # See: https://github.com/yt-dlp/yt-dlp/issues/340
4284 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
4285 thumbnail_names = [
4286 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
4287 # in resolution, these are not the custom thumbnail. So de-prioritize them
4288 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
4289 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3',
4290 ]
4291 n_thumbnail_names = len(thumbnail_names)
4292 thumbnails.extend({
4293 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
4294 video_id=video_id, name=name, ext=ext,
4295 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
4296 } for name in thumbnail_names for ext in ('webp', 'jpg'))
4297 for thumb in thumbnails:
4298 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
4299 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
4300 self._remove_duplicate_formats(thumbnails)
4301 self._downloader._sort_thumbnails(original_thumbnails)
4302
4303 category = get_first(microformats, 'category') or search_meta('genre')
4304 channel_id = self.ucid_or_none(str_or_none(
4305 get_first(video_details, 'channelId')
4306 or get_first(microformats, 'externalChannelId')
4307 or search_meta('channelId')))
4308 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
4309
4310 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
4311 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
4312 if not duration and live_end_time and live_start_time:
4313 duration = live_end_time - live_start_time
4314
4315 needs_live_processing = self._needs_live_processing(live_status, duration)
4316
4317 def is_bad_format(fmt):
4318 if needs_live_processing and not fmt.get('is_from_start'):
4319 return True
4320 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
4321 and fmt.get('protocol') == 'http_dash_segments'):
4322 return True
4323
4324 for fmt in filter(is_bad_format, formats):
4325 fmt['preference'] = (fmt.get('preference') or -1) - 10
4326 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
4327
4328 if needs_live_processing:
4329 self._prepare_live_from_start_formats(
4330 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
4331
4332 formats.extend(self._extract_storyboard(player_responses, duration))
4333
4334 channel_handle = self.handle_from_url(owner_profile_url)
4335
4336 info = {
4337 'id': video_id,
4338 'title': video_title,
4339 'formats': formats,
4340 'thumbnails': thumbnails,
4341 # The best thumbnail that we are sure exists. Prevents unnecessary
4342 # URL checking if user don't care about getting the best possible thumbnail
4343 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
4344 'description': video_description,
4345 'channel_id': channel_id,
4346 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None),
4347 'duration': duration,
4348 'view_count': int_or_none(
4349 get_first((video_details, microformats), (..., 'viewCount'))
4350 or search_meta('interactionCount')),
4351 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
4352 'age_limit': 18 if (
4353 get_first(microformats, 'isFamilySafe') is False
4354 or search_meta('isFamilyFriendly') == 'false'
4355 or search_meta('og:restrictions:age') == '18+') else 0,
4356 'webpage_url': webpage_url,
4357 'categories': [category] if category else None,
4358 'tags': keywords,
4359 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
4360 'live_status': live_status,
4361 'release_timestamp': live_start_time,
4362 '_format_sort_fields': ( # source_preference is lower for potentially damaged formats
4363 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'),
4364 }
4365
4366 subtitles = {}
4367 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
4368 if pctr:
4369 def get_lang_code(track):
4370 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
4371 or track.get('languageCode'))
4372
4373 # Converted into dicts to remove duplicates
4374 captions = {
4375 get_lang_code(sub): sub
4376 for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}
4377 translation_languages = {
4378 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
4379 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}
4380
4381 def process_language(container, base_url, lang_code, sub_name, query):
4382 lang_subs = container.setdefault(lang_code, [])
4383 for fmt in self._SUBTITLE_FORMATS:
4384 query.update({
4385 'fmt': fmt,
4386 })
4387 lang_subs.append({
4388 'ext': fmt,
4389 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
4390 'name': sub_name,
4391 })
4392
4393 # NB: Constructing the full subtitle dictionary is slow
4394 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
4395 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
4396 for lang_code, caption_track in captions.items():
4397 base_url = caption_track.get('baseUrl')
4398 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
4399 if not base_url:
4400 continue
4401 lang_name = self._get_text(caption_track, 'name', max_runs=1)
4402 if caption_track.get('kind') != 'asr':
4403 if not lang_code:
4404 continue
4405 process_language(
4406 subtitles, base_url, lang_code, lang_name, {})
4407 if not caption_track.get('isTranslatable'):
4408 continue
4409 for trans_code, trans_name in translation_languages.items():
4410 if not trans_code:
4411 continue
4412 orig_trans_code = trans_code
4413 if caption_track.get('kind') != 'asr' and trans_code != 'und':
4414 if not get_translated_subs:
4415 continue
4416 trans_code += f'-{lang_code}'
4417 trans_name += format_field(lang_name, None, ' from %s')
4418 if lang_code == f'a-{orig_trans_code}':
4419 # Set audio language based on original subtitles
4420 for f in formats:
4421 if f.get('acodec') != 'none' and not f.get('language'):
4422 f['language'] = orig_trans_code
4423 # Add an "-orig" label to the original language so that it can be distinguished.
4424 # The subs are returned without "-orig" as well for compatibility
4425 process_language(
4426 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
4427 # Setting tlang=lang returns damaged subtitles.
4428 process_language(automatic_captions, base_url, trans_code, trans_name,
4429 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
4430
4431 info['automatic_captions'] = automatic_captions
4432 info['subtitles'] = subtitles
4433
4434 parsed_url = urllib.parse.urlparse(url)
4435 for component in [parsed_url.fragment, parsed_url.query]:
4436 query = urllib.parse.parse_qs(component)
4437 for k, v in query.items():
4438 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
4439 d_k += '_time'
4440 if d_k not in info and k in s_ks:
4441 info[d_k] = parse_duration(v[0])
4442
4443 # Youtube Music Auto-generated description
4444 if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
4445 # XXX: Causes catastrophic backtracking if description has "·"
4446 # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
4447 # Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
4448 # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
4449 mobj = re.search(
4450 r'''(?xs)
4451 (?=(?P<track>[^\n·]+))(?P=track)·
4452 (?=(?P<artist>[^\n]+))(?P=artist)\n+
4453 (?=(?P<album>[^\n]+))(?P=album)\n
4454 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4455 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
4456 (.+?\nArtist\s*:\s*
4457 (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
4458 )?.+\nAuto-generated\ by\ YouTube\.\s*$
4459 ''', video_description)
4460 if mobj:
4461 release_year = mobj.group('release_year')
4462 release_date = mobj.group('release_date')
4463 if release_date:
4464 release_date = release_date.replace('-', '')
4465 if not release_year:
4466 release_year = release_date[:4]
4467 info.update({
4468 'album': mobj.group('album'.strip()),
4469 'artists': ([a] if (a := mobj.group('clean_artist'))
4470 else [a.strip() for a in mobj.group('artist').split('·')]),
4471 'track': mobj.group('track').strip(),
4472 'release_date': release_date,
4473 'release_year': int_or_none(release_year),
4474 })
4475
4476 initial_data = None
4477 if webpage:
4478 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
4479 if not traverse_obj(initial_data, 'contents'):
4480 self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
4481 initial_data = None
4482 if not initial_data:
4483 query = {'videoId': video_id}
4484 query.update(self._get_checkok_params())
4485 initial_data = self._extract_response(
4486 item_id=video_id, ep='next', fatal=False,
4487 ytcfg=master_ytcfg, query=query, check_get_keys='contents',
4488 headers=self.generate_api_headers(ytcfg=master_ytcfg),
4489 note='Downloading initial data API JSON')
4490
4491 info['comment_count'] = traverse_obj(initial_data, (
4492 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4493 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount',
4494 ), (
4495 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
4496 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo',
4497 ), expected_type=self._get_count, get_all=False)
4498
4499 try: # This will error if there is no livechat
4500 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
4501 except (KeyError, IndexError, TypeError):
4502 pass
4503 else:
4504 info.setdefault('subtitles', {})['live_chat'] = [{
4505 # url is needed to set cookies
4506 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
4507 'video_id': video_id,
4508 'ext': 'json',
4509 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4510 else 'youtube_live_chat_replay'),
4511 }]
4512
4513 if initial_data:
4514 info['chapters'] = (
4515 self._extract_chapters_from_json(initial_data, duration)
4516 or self._extract_chapters_from_engagement_panel(initial_data, duration)
4517 or self._extract_chapters_from_description(video_description, duration)
4518 or None)
4519
4520 info['heatmap'] = self._extract_heatmap(initial_data)
4521
4522 contents = traverse_obj(
4523 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4524 expected_type=list, default=[])
4525
4526 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4527 if vpir:
4528 stl = vpir.get('superTitleLink')
4529 if stl:
4530 stl = self._get_text(stl)
4531 if try_get(
4532 vpir,
4533 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4534 info['location'] = stl
4535 else:
4536 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
4537 if mobj:
4538 info.update({
4539 'series': mobj.group(1),
4540 'season_number': int(mobj.group(2)),
4541 'episode_number': int(mobj.group(3)),
4542 })
4543 for tlb in (try_get(
4544 vpir,
4545 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4546 list) or []):
4547 tbrs = variadic(
4548 traverse_obj(
4549 tlb, ('toggleButtonRenderer', ...),
4550 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))
4551 for tbr in tbrs:
4552 for getter, regex in [(
4553 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4554 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4555 lambda x: x['accessibility'],
4556 lambda x: x['accessibilityData']['accessibilityData'],
4557 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4558 label = (try_get(tbr, getter, dict) or {}).get('label')
4559 if label:
4560 mobj = re.match(regex, label)
4561 if mobj:
4562 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4563 break
4564
4565 info['like_count'] = traverse_obj(vpir, (
4566 'videoActions', 'menuRenderer', 'topLevelButtons', ...,
4567 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
4568 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
4569 'buttonViewModel', 'accessibilityText', {parse_count}), get_all=False)
4570
4571 vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
4572 if vcr:
4573 vc = self._get_count(vcr, 'viewCount')
4574 # Upcoming premieres with waiting count are treated as live here
4575 if vcr.get('isLive'):
4576 info['concurrent_view_count'] = vc
4577 elif info.get('view_count') is None:
4578 info['view_count'] = vc
4579
4580 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4581 if vsir:
4582 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4583 info.update({
4584 'channel': self._get_text(vor, 'title'),
4585 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4586
4587 if not channel_handle:
4588 channel_handle = self.handle_from_url(
4589 traverse_obj(vor, (
4590 ('navigationEndpoint', ('title', 'runs', ..., 'navigationEndpoint')),
4591 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl')),
4592 {str}), get_all=False))
4593
4594 rows = try_get(
4595 vsir,
4596 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4597 list) or []
4598 multiple_songs = False
4599 for row in rows:
4600 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4601 multiple_songs = True
4602 break
4603 for row in rows:
4604 mrr = row.get('metadataRowRenderer') or {}
4605 mrr_title = mrr.get('title')
4606 if not mrr_title:
4607 continue
4608 mrr_title = self._get_text(mrr, 'title')
4609 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4610 if mrr_title == 'License':
4611 info['license'] = mrr_contents_text
4612 elif not multiple_songs:
4613 if mrr_title == 'Album':
4614 info['album'] = mrr_contents_text
4615 elif mrr_title == 'Artist':
4616 info['artists'] = [mrr_contents_text] if mrr_contents_text else None
4617 elif mrr_title == 'Song':
4618 info['track'] = mrr_contents_text
4619 owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
4620 if self._has_badge(owner_badges, BadgeType.VERIFIED):
4621 info['channel_is_verified'] = True
4622
4623 info.update({
4624 'uploader': info.get('channel'),
4625 'uploader_id': channel_handle,
4626 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4627 })
4628
4629 # We only want timestamp IF it has time precision AND a timezone
4630 # Currently the uploadDate in microformats appears to be in US/Pacific timezone.
4631 timestamp = (
4632 parse_iso8601(get_first(microformats, 'uploadDate'), timezone=NO_DEFAULT)
4633 or parse_iso8601(search_meta('uploadDate'), timezone=NO_DEFAULT)
4634 )
4635 upload_date = (
4636 dt.datetime.fromtimestamp(timestamp, dt.timezone.utc).strftime('%Y%m%d') if timestamp else
4637 (
4638 unified_strdate(get_first(microformats, 'uploadDate'))
4639 or unified_strdate(search_meta('uploadDate'))
4640 ))
4641
4642 # In the case we cannot get the timestamp:
4643 # The upload date for scheduled, live and past live streams / premieres in microformats
4644 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
4645 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
4646 if not upload_date or (not timestamp and live_status in ('not_live', None)):
4647 # this should be in UTC, as configured in the cookie/client context
4648 upload_date = strftime_or_none(
4649 self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
4650
4651 info['upload_date'] = upload_date
4652 info['timestamp'] = timestamp
4653
4654 if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
4655 # Newly uploaded videos' HLS formats are potentially problematic and need to be checked
4656 upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc)
4657 if upload_datetime >= datetime_from_str('today-2days'):
4658 for fmt in info['formats']:
4659 if fmt.get('protocol') == 'm3u8_native':
4660 fmt['__needs_testing'] = True
4661
4662 for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:
4663 v = info.get(s_k)
4664 if v:
4665 info[d_k] = v
4666
4667 badges = self._extract_badges(traverse_obj(vpir, 'badges'))
4668
4669 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4670 or get_first(video_details, 'isPrivate', expected_type=bool))
4671
4672 info['availability'] = (
4673 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4674 else self._availability(
4675 is_private=is_private,
4676 needs_premium=(
4677 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4678 or False if initial_data and is_private is not None else None),
4679 needs_subscription=(
4680 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4681 or False if initial_data and is_private is not None else None),
4682 needs_auth=info['age_limit'] >= 18,
4683 is_unlisted=None if is_private is None else (
4684 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4685 or get_first(microformats, 'isUnlisted', expected_type=bool))))
4686
4687 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4688
4689 self.mark_watched(video_id, player_responses)
4690
4691 return info
4692
4693
4694 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
4695 @staticmethod
4696 def passthrough_smuggled_data(func):
4697 def _smuggle(info, smuggled_data):
4698 if info.get('_type') not in ('url', 'url_transparent'):
4699 return info
4700 if smuggled_data.get('is_music_url'):
4701 parsed_url = urllib.parse.urlparse(info['url'])
4702 if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
4703 smuggled_data.pop('is_music_url')
4704 info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
4705 if smuggled_data:
4706 info['url'] = smuggle_url(info['url'], smuggled_data)
4707 return info
4708
4709 @functools.wraps(func)
4710 def wrapper(self, url):
4711 url, smuggled_data = unsmuggle_url(url, {})
4712 if self.is_music_url(url):
4713 smuggled_data['is_music_url'] = True
4714 info_dict = func(self, url, smuggled_data)
4715 if smuggled_data:
4716 _smuggle(info_dict, smuggled_data)
4717 if info_dict.get('entries'):
4718 info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
4719 return info_dict
4720 return wrapper
4721
4722 @staticmethod
4723 def _extract_basic_item_renderer(item):
4724 # Modified from _extract_grid_item_renderer
4725 known_basic_renderers = (
4726 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer',
4727 )
4728 for key, renderer in item.items():
4729 if not isinstance(renderer, dict):
4730 continue
4731 elif key in known_basic_renderers:
4732 return renderer
4733 elif key.startswith('grid') and key.endswith('Renderer'):
4734 return renderer
4735
4736 def _extract_channel_renderer(self, renderer):
4737 channel_id = self.ucid_or_none(renderer['channelId'])
4738 title = self._get_text(renderer, 'title')
4739 channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
4740 channel_handle = self.handle_from_url(
4741 traverse_obj(renderer, (
4742 'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
4743 ('browseEndpoint', 'canonicalBaseUrl')),
4744 {str}), get_all=False))
4745 if not channel_handle:
4746 # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
4747 channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
4748 return {
4749 '_type': 'url',
4750 'url': channel_url,
4751 'id': channel_id,
4752 'ie_key': YoutubeTabIE.ie_key(),
4753 'channel': title,
4754 'uploader': title,
4755 'channel_id': channel_id,
4756 'channel_url': channel_url,
4757 'title': title,
4758 'uploader_id': channel_handle,
4759 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4760 # See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
4761 # However, in feed/channels this is set correctly to the subscriber count
4762 'channel_follower_count': traverse_obj(
4763 renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
4764 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
4765 'playlist_count': (
4766 # videoCountText may be the subscriber count
4767 self._get_count(renderer, 'videoCountText')
4768 if self._get_count(renderer, 'subscriberCountText') is not None else None),
4769 'description': self._get_text(renderer, 'descriptionSnippet'),
4770 'channel_is_verified': True if self._has_badge(
4771 self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
4772 }
4773
4774 def _grid_entries(self, grid_renderer):
4775 for item in grid_renderer['items']:
4776 if not isinstance(item, dict):
4777 continue
4778 renderer = self._extract_basic_item_renderer(item)
4779 if not isinstance(renderer, dict):
4780 continue
4781 title = self._get_text(renderer, 'title')
4782
4783 # playlist
4784 playlist_id = renderer.get('playlistId')
4785 if playlist_id:
4786 yield self.url_result(
4787 f'https://www.youtube.com/playlist?list={playlist_id}',
4788 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4789 video_title=title)
4790 continue
4791 # video
4792 video_id = renderer.get('videoId')
4793 if video_id:
4794 yield self._extract_video(renderer)
4795 continue
4796 # channel
4797 channel_id = renderer.get('channelId')
4798 if channel_id:
4799 yield self._extract_channel_renderer(renderer)
4800 continue
4801 # generic endpoint URL support
4802 ep_url = urljoin('https://www.youtube.com/', try_get(
4803 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4804 str))
4805 if ep_url:
4806 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4807 if ie.suitable(ep_url):
4808 yield self.url_result(
4809 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4810 break
4811
4812 def _music_reponsive_list_entry(self, renderer):
4813 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4814 if video_id:
4815 title = traverse_obj(renderer, (
4816 'flexColumns', 0, 'musicResponsiveListItemFlexColumnRenderer',
4817 'text', 'runs', 0, 'text'))
4818 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4819 ie=YoutubeIE.ie_key(), video_id=video_id, title=title)
4820 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4821 if playlist_id:
4822 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4823 if video_id:
4824 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4825 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4826 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4827 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4828 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4829 if browse_id:
4830 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4831 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4832
4833 def _shelf_entries_from_content(self, shelf_renderer):
4834 content = shelf_renderer.get('content')
4835 if not isinstance(content, dict):
4836 return
4837 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
4838 if renderer:
4839 # TODO: add support for nested playlists so each shelf is processed
4840 # as separate playlist
4841 # TODO: this includes only first N items
4842 yield from self._grid_entries(renderer)
4843 renderer = content.get('horizontalListRenderer')
4844 if renderer:
4845 # TODO: handle case
4846 pass
4847
4848 def _shelf_entries(self, shelf_renderer, skip_channels=False):
4849 ep = try_get(
4850 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4851 str)
4852 shelf_url = urljoin('https://www.youtube.com', ep)
4853 if shelf_url:
4854 # Skipping links to another channels, note that checking for
4855 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4856 # will not work
4857 if skip_channels and '/channels?' in shelf_url:
4858 return
4859 title = self._get_text(shelf_renderer, 'title')
4860 yield self.url_result(shelf_url, video_title=title)
4861 # Shelf may not contain shelf URL, fallback to extraction from content
4862 yield from self._shelf_entries_from_content(shelf_renderer)
4863
4864 def _playlist_entries(self, video_list_renderer):
4865 for content in video_list_renderer['contents']:
4866 if not isinstance(content, dict):
4867 continue
4868 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4869 if not isinstance(renderer, dict):
4870 continue
4871 video_id = renderer.get('videoId')
4872 if not video_id:
4873 continue
4874 yield self._extract_video(renderer)
4875
4876 def _rich_entries(self, rich_grid_renderer):
4877 renderer = traverse_obj(
4878 rich_grid_renderer,
4879 ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all=False) or {}
4880 video_id = renderer.get('videoId')
4881 if video_id:
4882 yield self._extract_video(renderer)
4883 return
4884 playlist_id = renderer.get('playlistId')
4885 if playlist_id:
4886 yield self.url_result(
4887 f'https://www.youtube.com/playlist?list={playlist_id}',
4888 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4889 video_title=self._get_text(renderer, 'title'))
4890 return
4891
4892 def _video_entry(self, video_renderer):
4893 video_id = video_renderer.get('videoId')
4894 if video_id:
4895 return self._extract_video(video_renderer)
4896
4897 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4898 url = urljoin('https://youtube.com', traverse_obj(
4899 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4900 if url:
4901 return self.url_result(
4902 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4903
4904 def _post_thread_entries(self, post_thread_renderer):
4905 post_renderer = try_get(
4906 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4907 if not post_renderer:
4908 return
4909 # video attachment
4910 video_renderer = try_get(
4911 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4912 video_id = video_renderer.get('videoId')
4913 if video_id:
4914 entry = self._extract_video(video_renderer)
4915 if entry:
4916 yield entry
4917 # playlist attachment
4918 playlist_id = try_get(
4919 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
4920 if playlist_id:
4921 yield self.url_result(
4922 f'https://www.youtube.com/playlist?list={playlist_id}',
4923 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4924 # inline video links
4925 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4926 for run in runs:
4927 if not isinstance(run, dict):
4928 continue
4929 ep_url = try_get(
4930 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
4931 if not ep_url:
4932 continue
4933 if not YoutubeIE.suitable(ep_url):
4934 continue
4935 ep_video_id = YoutubeIE._match_id(ep_url)
4936 if video_id == ep_video_id:
4937 continue
4938 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
4939
4940 def _post_thread_continuation_entries(self, post_thread_continuation):
4941 contents = post_thread_continuation.get('contents')
4942 if not isinstance(contents, list):
4943 return
4944 for content in contents:
4945 renderer = content.get('backstagePostThreadRenderer')
4946 if isinstance(renderer, dict):
4947 yield from self._post_thread_entries(renderer)
4948 continue
4949 renderer = content.get('videoRenderer')
4950 if isinstance(renderer, dict):
4951 yield self._video_entry(renderer)
4952
4953 r''' # unused
4954 def _rich_grid_entries(self, contents):
4955 for content in contents:
4956 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4957 if video_renderer:
4958 entry = self._video_entry(video_renderer)
4959 if entry:
4960 yield entry
4961 '''
4962
4963 def _report_history_entries(self, renderer):
4964 for url in traverse_obj(renderer, (
4965 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4966 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
4967 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4968 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
4969
4970 def _extract_entries(self, parent_renderer, continuation_list):
4971 # continuation_list is modified in-place with continuation_list = [continuation_token]
4972 continuation_list[:] = [None]
4973 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4974 for content in contents:
4975 if not isinstance(content, dict):
4976 continue
4977 is_renderer = traverse_obj(
4978 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4979 expected_type=dict)
4980 if not is_renderer:
4981 if content.get('richItemRenderer'):
4982 for entry in self._rich_entries(content['richItemRenderer']):
4983 yield entry
4984 continuation_list[0] = self._extract_continuation(parent_renderer)
4985 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4986 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4987 yield from self._report_history_entries(table)
4988 continuation_list[0] = self._extract_continuation(table)
4989 continue
4990
4991 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4992 for isr_content in isr_contents:
4993 if not isinstance(isr_content, dict):
4994 continue
4995
4996 known_renderers = {
4997 'playlistVideoListRenderer': self._playlist_entries,
4998 'gridRenderer': self._grid_entries,
4999 'reelShelfRenderer': self._grid_entries,
5000 'shelfRenderer': self._shelf_entries,
5001 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
5002 'backstagePostThreadRenderer': self._post_thread_entries,
5003 'videoRenderer': lambda x: [self._video_entry(x)],
5004 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
5005 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
5006 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],
5007 'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),
5008 }
5009 for key, renderer in isr_content.items():
5010 if key not in known_renderers:
5011 continue
5012 for entry in known_renderers[key](renderer):
5013 if entry:
5014 yield entry
5015 continuation_list[0] = self._extract_continuation(renderer)
5016 break
5017
5018 if not continuation_list[0]:
5019 continuation_list[0] = self._extract_continuation(is_renderer)
5020
5021 if not continuation_list[0]:
5022 continuation_list[0] = self._extract_continuation(parent_renderer)
5023
5024 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
5025 continuation_list = [None]
5026 extract_entries = lambda x: self._extract_entries(x, continuation_list)
5027 tab_content = try_get(tab, lambda x: x['content'], dict)
5028 if not tab_content:
5029 return
5030 parent_renderer = (
5031 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
5032 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
5033 yield from extract_entries(parent_renderer)
5034 continuation = continuation_list[0]
5035 seen_continuations = set()
5036 for page_num in itertools.count(1):
5037 if not continuation:
5038 break
5039 continuation_token = continuation.get('continuation')
5040 if continuation_token is not None and continuation_token in seen_continuations:
5041 self.write_debug('Detected YouTube feed looping - assuming end of feed.')
5042 break
5043 seen_continuations.add(continuation_token)
5044 headers = self.generate_api_headers(
5045 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
5046 response = self._extract_response(
5047 item_id=f'{item_id} page {page_num}',
5048 query=continuation, headers=headers, ytcfg=ytcfg,
5049 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
5050
5051 if not response:
5052 break
5053 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
5054 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
5055 visitor_data = self._extract_visitor_data(response) or visitor_data
5056
5057 known_renderers = {
5058 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
5059 'gridPlaylistRenderer': (self._grid_entries, 'items'),
5060 'gridVideoRenderer': (self._grid_entries, 'items'),
5061 'gridChannelRenderer': (self._grid_entries, 'items'),
5062 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
5063 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
5064 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
5065 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
5066 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
5067 'playlistVideoListContinuation': (self._playlist_entries, None),
5068 'gridContinuation': (self._grid_entries, None),
5069 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
5070 'sectionListContinuation': (extract_entries, None), # for feeds
5071 }
5072
5073 continuation_items = traverse_obj(response, (
5074 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
5075 'appendContinuationItemsAction', 'continuationItems',
5076 ), 'continuationContents', get_all=False)
5077 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
5078
5079 video_items_renderer = None
5080 for key in continuation_item:
5081 if key not in known_renderers:
5082 continue
5083 func, parent_key = known_renderers[key]
5084 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
5085 continuation_list = [None]
5086 yield from func(video_items_renderer)
5087 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
5088
5089 if not video_items_renderer:
5090 break
5091
5092 @staticmethod
5093 def _extract_selected_tab(tabs, fatal=True):
5094 for tab_renderer in tabs:
5095 if tab_renderer.get('selected'):
5096 return tab_renderer
5097 if fatal:
5098 raise ExtractorError('Unable to find selected tab')
5099
5100 @staticmethod
5101 def _extract_tab_renderers(response):
5102 return traverse_obj(
5103 response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
5104
5105 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
5106 metadata = self._extract_metadata_from_tabs(item_id, data)
5107
5108 selected_tab = self._extract_selected_tab(tabs)
5109 metadata['title'] += format_field(selected_tab, 'title', ' - %s')
5110 metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
5111
5112 return self.playlist_result(
5113 self._entries(
5114 selected_tab, metadata['id'], ytcfg,
5115 self._extract_account_syncid(ytcfg, data),
5116 self._extract_visitor_data(data, ytcfg)),
5117 **metadata)
5118
5119 def _extract_metadata_from_tabs(self, item_id, data):
5120 info = {'id': item_id}
5121
5122 metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
5123 if metadata_renderer:
5124 channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),
5125 ('channelUrl', {self.ucid_from_url}))
5126 info.update({
5127 'channel': metadata_renderer.get('title'),
5128 'channel_id': channel_id,
5129 })
5130 if info['channel_id']:
5131 info['id'] = info['channel_id']
5132 else:
5133 metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
5134
5135 # pageHeaderViewModel slow rollout began April 2024
5136 page_header_view_model = traverse_obj(data, (
5137 'header', 'pageHeaderRenderer', 'content', 'pageHeaderViewModel', {dict}))
5138
5139 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
5140 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
5141 def _get_uncropped(url):
5142 return url_or_none((url or '').split('=')[0] + '=s0')
5143
5144 avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
5145 if avatar_thumbnails:
5146 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
5147 if uncropped_avatar:
5148 avatar_thumbnails.append({
5149 'url': uncropped_avatar,
5150 'id': 'avatar_uncropped',
5151 'preference': 1,
5152 })
5153
5154 channel_banners = (
5155 self._extract_thumbnails(data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
5156 or self._extract_thumbnails(
5157 page_header_view_model, ('banner', 'imageBannerViewModel', 'image'), final_key='sources'))
5158 for banner in channel_banners:
5159 banner['preference'] = -10
5160
5161 if channel_banners:
5162 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
5163 if uncropped_banner:
5164 channel_banners.append({
5165 'url': uncropped_banner,
5166 'id': 'banner_uncropped',
5167 'preference': -5,
5168 })
5169
5170 # Deprecated - remove primary_sidebar_renderer when layout discontinued
5171 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
5172 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
5173
5174 primary_thumbnails = self._extract_thumbnails(
5175 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
5176 playlist_thumbnails = self._extract_thumbnails(
5177 playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
5178
5179 info.update({
5180 'title': (traverse_obj(metadata_renderer, 'title')
5181 or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
5182 or info['id']),
5183 'availability': self._extract_availability(data),
5184 'channel_follower_count': (
5185 self._get_count(data, ('header', ..., 'subscriberCountText'))
5186 or traverse_obj(page_header_view_model, (
5187 'metadata', 'contentMetadataViewModel', 'metadataRows', ..., 'metadataParts',
5188 lambda _, v: 'subscribers' in v['text']['content'], 'text', 'content', {parse_count}, any))),
5189 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
5190 'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str}))
5191 or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))),
5192 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
5193 })
5194
5195 channel_handle = (
5196 traverse_obj(metadata_renderer, (('vanityChannelUrl', ('ownerUrls', ...)), {self.handle_from_url}), get_all=False)
5197 or traverse_obj(data, ('header', ..., 'channelHandleText', {self.handle_or_none}), get_all=False))
5198
5199 if channel_handle:
5200 info.update({
5201 'uploader_id': channel_handle,
5202 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
5203 })
5204
5205 channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
5206 if self._has_badge(channel_badges, BadgeType.VERIFIED):
5207 info['channel_is_verified'] = True
5208 # Playlist stats is a text runs array containing [video count, view count, last updated].
5209 # last updated or (view count and last updated) may be missing.
5210 playlist_stats = get_first(
5211 (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
5212
5213 last_updated_unix = self._parse_time_text(
5214 self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
5215 or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
5216 info['modified_date'] = strftime_or_none(last_updated_unix)
5217
5218 info['view_count'] = self._get_count(playlist_stats, 1)
5219 if info['view_count'] is None: # 0 is allowed
5220 info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
5221 if info['view_count'] is None:
5222 info['view_count'] = self._get_count(data, (
5223 'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',
5224 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))
5225
5226 info['playlist_count'] = self._get_count(playlist_stats, 0)
5227 if info['playlist_count'] is None: # 0 is allowed
5228 info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
5229
5230 if not info.get('channel_id'):
5231 owner = traverse_obj(playlist_header_renderer, 'ownerText')
5232 if not owner: # Deprecated
5233 owner = traverse_obj(
5234 self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
5235 ('videoOwner', 'videoOwnerRenderer', 'title'))
5236 owner_text = self._get_text(owner)
5237 browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
5238 info.update({
5239 'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
5240 'channel_id': self.ucid_or_none(browse_ep.get('browseId')),
5241 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))),
5242 })
5243
5244 info.update({
5245 'uploader': info['channel'],
5246 'channel_url': format_field(info.get('channel_id'), None, 'https://www.youtube.com/channel/%s', default=None),
5247 'uploader_url': format_field(info.get('uploader_id'), None, 'https://www.youtube.com/%s', default=None),
5248 })
5249
5250 return info
5251
5252 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
5253 first_id = last_id = response = None
5254 for page_num in itertools.count(1):
5255 videos = list(self._playlist_entries(playlist))
5256 if not videos:
5257 return
5258 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
5259 if start >= len(videos):
5260 return
5261 yield from videos[start:]
5262 first_id = first_id or videos[0]['id']
5263 last_id = videos[-1]['id']
5264 watch_endpoint = try_get(
5265 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
5266 headers = self.generate_api_headers(
5267 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5268 visitor_data=self._extract_visitor_data(response, data, ytcfg))
5269 query = {
5270 'playlistId': playlist_id,
5271 'videoId': watch_endpoint.get('videoId') or last_id,
5272 'index': watch_endpoint.get('index') or len(videos),
5273 'params': watch_endpoint.get('params') or 'OAE%3D',
5274 }
5275 response = self._extract_response(
5276 item_id=f'{playlist_id} page {page_num}',
5277 query=query, ep='next', headers=headers, ytcfg=ytcfg,
5278 check_get_keys='contents',
5279 )
5280 playlist = try_get(
5281 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
5282
5283 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
5284 title = playlist.get('title') or try_get(
5285 data, lambda x: x['titleText']['simpleText'], str)
5286 playlist_id = playlist.get('playlistId') or item_id
5287
5288 # Delegating everything except mix playlists to regular tab-based playlist URL
5289 playlist_url = urljoin(url, try_get(
5290 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
5291 str))
5292
5293 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
5294 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
5295 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
5296
5297 if playlist_url and playlist_url != url and not is_known_unviewable:
5298 return self.url_result(
5299 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
5300 video_title=title)
5301
5302 return self.playlist_result(
5303 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
5304 playlist_id=playlist_id, playlist_title=title)
5305
5306 def _extract_availability(self, data):
5307 """
5308 Gets the availability of a given playlist/tab.
5309 Note: Unless YouTube tells us explicitly, we do not assume it is public
5310 @param data: response
5311 """
5312 sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
5313 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
5314 player_header_privacy = playlist_header_renderer.get('privacy')
5315
5316 badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
5317
5318 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
5319 privacy_setting_icon = get_first(
5320 (playlist_header_renderer, sidebar_renderer),
5321 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
5322 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
5323 expected_type=str)
5324
5325 microformats_is_unlisted = traverse_obj(
5326 data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
5327
5328 return (
5329 'public' if (
5330 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
5331 or player_header_privacy == 'PUBLIC'
5332 or privacy_setting_icon == 'PRIVACY_PUBLIC')
5333 else self._availability(
5334 is_private=(
5335 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
5336 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
5337 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
5338 is_unlisted=(
5339 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
5340 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
5341 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
5342 else microformats_is_unlisted if microformats_is_unlisted is not None else None),
5343 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
5344 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
5345 needs_auth=False))
5346
5347 @staticmethod
5348 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
5349 sidebar_renderer = try_get(
5350 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
5351 for item in sidebar_renderer:
5352 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
5353 if renderer:
5354 return renderer
5355
5356 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
5357 """
5358 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
5359 """
5360 is_playlist = bool(traverse_obj(
5361 data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
5362 if not is_playlist:
5363 return
5364 headers = self.generate_api_headers(
5365 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5366 visitor_data=self._extract_visitor_data(data, ytcfg))
5367 query = {
5368 'params': 'wgYCCAA=',
5369 'browseId': f'VL{item_id}',
5370 }
5371 return self._extract_response(
5372 item_id=item_id, headers=headers, query=query,
5373 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
5374 note='Redownloading playlist API JSON with unavailable videos')
5375
5376 @functools.cached_property
5377 def skip_webpage(self):
5378 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
5379
5380 def _extract_webpage(self, url, item_id, fatal=True):
5381 webpage, data = None, None
5382 for retry in self.RetryManager(fatal=fatal):
5383 try:
5384 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
5385 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
5386 except ExtractorError as e:
5387 if isinstance(e.cause, network_exceptions):
5388 if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):
5389 retry.error = e
5390 continue
5391 self._error_or_warning(e, fatal=fatal)
5392 break
5393
5394 try:
5395 self._extract_and_report_alerts(data)
5396 except ExtractorError as e:
5397 self._error_or_warning(e, fatal=fatal)
5398 break
5399
5400 # Sometimes youtube returns a webpage with incomplete ytInitialData
5401 # See: https://github.com/yt-dlp/yt-dlp/issues/116
5402 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
5403 retry.error = ExtractorError('Incomplete yt initial data received')
5404 data = None
5405 continue
5406
5407 return webpage, data
5408
5409 def _report_playlist_authcheck(self, ytcfg, fatal=True):
5410 """Use if failed to extract ytcfg (and data) from initial webpage"""
5411 if not ytcfg and self.is_authenticated:
5412 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
5413 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
5414 raise ExtractorError(
5415 f'{msg}. If you are not downloading private content, or '
5416 'your cookies are only for the first account and channel,'
5417 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
5418 expected=True)
5419 self.report_warning(msg, only_once=True)
5420
5421 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
5422 data = None
5423 if not self.skip_webpage:
5424 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
5425 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
5426 # Reject webpage data if redirected to home page without explicitly requesting
5427 selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
5428 if (url != 'https://www.youtube.com/feed/recommended'
5429 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
5430 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
5431 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
5432 if fatal:
5433 raise ExtractorError(msg, expected=True)
5434 self.report_warning(msg, only_once=True)
5435 if not data:
5436 self._report_playlist_authcheck(ytcfg, fatal=fatal)
5437 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
5438 return data, ytcfg
5439
5440 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
5441 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
5442 resolve_response = self._extract_response(
5443 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
5444 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
5445 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
5446 for ep_key, ep in endpoints.items():
5447 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
5448 if params:
5449 return self._extract_response(
5450 item_id=item_id, query=params, ep=ep, headers=headers,
5451 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
5452 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
5453 err_note = 'Failed to resolve url (does the playlist exist?)'
5454 if fatal:
5455 raise ExtractorError(err_note, expected=True)
5456 self.report_warning(err_note, item_id)
5457
5458 _SEARCH_PARAMS = None
5459
5460 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
5461 data = {'query': query}
5462 if params is NO_DEFAULT:
5463 params = self._SEARCH_PARAMS
5464 if params:
5465 data['params'] = params
5466
5467 content_keys = (
5468 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
5469 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
5470 # ytmusic search
5471 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
5472 ('continuationContents', ),
5473 )
5474 display_id = f'query "{query}"'
5475 check_get_keys = tuple({keys[0] for keys in content_keys})
5476 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
5477 self._report_playlist_authcheck(ytcfg, fatal=False)
5478
5479 continuation_list = [None]
5480 search = None
5481 for page_num in itertools.count(1):
5482 data.update(continuation_list[0] or {})
5483 headers = self.generate_api_headers(
5484 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
5485 search = self._extract_response(
5486 item_id=f'{display_id} page {page_num}', ep='search', query=data,
5487 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
5488 slr_contents = traverse_obj(search, *content_keys)
5489 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
5490 if not continuation_list[0]:
5491 break
5492
5493
5494 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
5495 IE_DESC = 'YouTube Tabs'
5496 _VALID_URL = r'''(?x:
5497 https?://
5498 (?!consent\.)(?:\w+\.)?
5499 (?:
5500 youtube(?:kids)?\.com|
5501 {invidious}
5502 )/
5503 (?:
5504 (?P<channel_type>channel|c|user|browse)/|
5505 (?P<not_channel>
5506 feed/|hashtag/|
5507 (?:playlist|watch)\?.*?\blist=
5508 )|
5509 (?!(?:{reserved_names})\b) # Direct URLs
5510 )
5511 (?P<id>[^/?\#&]+)
5512 )'''.format(
5513 reserved_names=YoutubeBaseInfoExtractor._RESERVED_NAMES,
5514 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5515 )
5516 IE_NAME = 'youtube:tab'
5517
5518 _TESTS = [{
5519 'note': 'playlists, multipage',
5520 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5521 'playlist_mincount': 94,
5522 'info_dict': {
5523 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5524 'title': 'Igor Kleiner Ph.D. - Playlists',
5525 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5526 'uploader': 'Igor Kleiner Ph.D.',
5527 'uploader_id': '@IgorDataScience',
5528 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
5529 'channel': 'Igor Kleiner Ph.D.',
5530 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5531 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
5532 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5533 'channel_follower_count': int,
5534 },
5535 }, {
5536 'note': 'playlists, multipage, different order',
5537 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5538 'playlist_mincount': 94,
5539 'info_dict': {
5540 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5541 'title': 'Igor Kleiner Ph.D. - Playlists',
5542 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5543 'uploader': 'Igor Kleiner Ph.D.',
5544 'uploader_id': '@IgorDataScience',
5545 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
5546 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
5547 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5548 'channel': 'Igor Kleiner Ph.D.',
5549 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5550 'channel_follower_count': int,
5551 },
5552 }, {
5553 'note': 'playlists, series',
5554 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5555 'playlist_mincount': 5,
5556 'info_dict': {
5557 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5558 'title': '3Blue1Brown - Playlists',
5559 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
5560 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5561 'channel': '3Blue1Brown',
5562 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5563 'uploader_id': '@3blue1brown',
5564 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5565 'uploader': '3Blue1Brown',
5566 'tags': ['Mathematics'],
5567 'channel_follower_count': int,
5568 'channel_is_verified': True,
5569 },
5570 }, {
5571 'note': 'playlists, singlepage',
5572 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5573 'playlist_mincount': 4,
5574 'info_dict': {
5575 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5576 'title': 'ThirstForScience - Playlists',
5577 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5578 'uploader': 'ThirstForScience',
5579 'uploader_url': 'https://www.youtube.com/@ThirstForScience',
5580 'uploader_id': '@ThirstForScience',
5581 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5582 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5583 'tags': 'count:12',
5584 'channel': 'ThirstForScience',
5585 'channel_follower_count': int,
5586 },
5587 }, {
5588 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5589 'only_matching': True,
5590 }, {
5591 'note': 'basic, single video playlist',
5592 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5593 'info_dict': {
5594 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5595 'title': 'youtube-dl public playlist',
5596 'description': '',
5597 'tags': [],
5598 'view_count': int,
5599 'modified_date': '20201130',
5600 'channel': 'Sergey M.',
5601 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5602 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5603 'availability': 'public',
5604 'uploader': 'Sergey M.',
5605 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5606 'uploader_id': '@sergeym.6173',
5607 },
5608 'playlist_count': 1,
5609 }, {
5610 'note': 'empty playlist',
5611 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5612 'info_dict': {
5613 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5614 'title': 'youtube-dl empty playlist',
5615 'tags': [],
5616 'channel': 'Sergey M.',
5617 'description': '',
5618 'modified_date': '20230921',
5619 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5620 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5621 'availability': 'unlisted',
5622 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5623 'uploader_id': '@sergeym.6173',
5624 'uploader': 'Sergey M.',
5625 },
5626 'playlist_count': 0,
5627 }, {
5628 'note': 'Home tab',
5629 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5630 'info_dict': {
5631 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5632 'title': 'lex will - Home',
5633 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5634 'uploader': 'lex will',
5635 'uploader_id': '@lexwill718',
5636 'channel': 'lex will',
5637 'tags': ['bible', 'history', 'prophesy'],
5638 'uploader_url': 'https://www.youtube.com/@lexwill718',
5639 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5640 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5641 'channel_follower_count': int,
5642 },
5643 'playlist_mincount': 2,
5644 }, {
5645 'note': 'Videos tab',
5646 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5647 'info_dict': {
5648 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5649 'title': 'lex will - Videos',
5650 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5651 'uploader': 'lex will',
5652 'uploader_id': '@lexwill718',
5653 'tags': ['bible', 'history', 'prophesy'],
5654 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5655 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5656 'uploader_url': 'https://www.youtube.com/@lexwill718',
5657 'channel': 'lex will',
5658 'channel_follower_count': int,
5659 },
5660 'playlist_mincount': 975,
5661 }, {
5662 'note': 'Videos tab, sorted by popular',
5663 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5664 'info_dict': {
5665 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5666 'title': 'lex will - Videos',
5667 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5668 'uploader': 'lex will',
5669 'uploader_id': '@lexwill718',
5670 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5671 'uploader_url': 'https://www.youtube.com/@lexwill718',
5672 'channel': 'lex will',
5673 'tags': ['bible', 'history', 'prophesy'],
5674 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5675 'channel_follower_count': int,
5676 },
5677 'playlist_mincount': 199,
5678 }, {
5679 'note': 'Playlists tab',
5680 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5681 'info_dict': {
5682 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5683 'title': 'lex will - Playlists',
5684 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5685 'uploader': 'lex will',
5686 'uploader_id': '@lexwill718',
5687 'uploader_url': 'https://www.youtube.com/@lexwill718',
5688 'channel': 'lex will',
5689 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5690 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5691 'tags': ['bible', 'history', 'prophesy'],
5692 'channel_follower_count': int,
5693 },
5694 'playlist_mincount': 17,
5695 }, {
5696 'note': 'Community tab',
5697 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5698 'info_dict': {
5699 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5700 'title': 'lex will - Community',
5701 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5702 'channel': 'lex will',
5703 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5704 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5705 'tags': ['bible', 'history', 'prophesy'],
5706 'channel_follower_count': int,
5707 'uploader_url': 'https://www.youtube.com/@lexwill718',
5708 'uploader_id': '@lexwill718',
5709 'uploader': 'lex will',
5710 },
5711 'playlist_mincount': 18,
5712 }, {
5713 'note': 'Channels tab',
5714 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5715 'info_dict': {
5716 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5717 'title': 'lex will - Channels',
5718 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5719 'channel': 'lex will',
5720 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5721 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5722 'tags': ['bible', 'history', 'prophesy'],
5723 'channel_follower_count': int,
5724 'uploader_url': 'https://www.youtube.com/@lexwill718',
5725 'uploader_id': '@lexwill718',
5726 'uploader': 'lex will',
5727 },
5728 'playlist_mincount': 12,
5729 }, {
5730 'note': 'Search tab',
5731 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5732 'playlist_mincount': 40,
5733 'info_dict': {
5734 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5735 'title': '3Blue1Brown - Search - linear algebra',
5736 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
5737 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5738 'tags': ['Mathematics'],
5739 'channel': '3Blue1Brown',
5740 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5741 'channel_follower_count': int,
5742 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5743 'uploader_id': '@3blue1brown',
5744 'uploader': '3Blue1Brown',
5745 'channel_is_verified': True,
5746 },
5747 }, {
5748 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5749 'only_matching': True,
5750 }, {
5751 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5752 'only_matching': True,
5753 }, {
5754 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5755 'only_matching': True,
5756 }, {
5757 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5758 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5759 'info_dict': {
5760 'title': '29C3: Not my department',
5761 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5762 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
5763 'tags': [],
5764 'view_count': int,
5765 'modified_date': '20150605',
5766 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5767 'channel_url': 'https://www.youtube.com/channel/UCEPzS1rYsrkqzSLNp76nrcg',
5768 'channel': 'Christiaan008',
5769 'availability': 'public',
5770 'uploader_id': '@ChRiStIaAn008',
5771 'uploader': 'Christiaan008',
5772 'uploader_url': 'https://www.youtube.com/@ChRiStIaAn008',
5773 },
5774 'playlist_count': 96,
5775 }, {
5776 'note': 'Large playlist',
5777 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5778 'info_dict': {
5779 'title': 'Uploads from Cauchemar',
5780 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5781 'channel_url': 'https://www.youtube.com/channel/UCBABnxM4Ar9ten8Mdjj1j0Q',
5782 'tags': [],
5783 'modified_date': r're:\d{8}',
5784 'channel': 'Cauchemar',
5785 'view_count': int,
5786 'description': '',
5787 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5788 'availability': 'public',
5789 'uploader_id': '@Cauchemar89',
5790 'uploader': 'Cauchemar',
5791 'uploader_url': 'https://www.youtube.com/@Cauchemar89',
5792 },
5793 'playlist_mincount': 1123,
5794 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5795 }, {
5796 'note': 'even larger playlist, 8832 videos',
5797 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5798 'only_matching': True,
5799 }, {
5800 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5801 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5802 'info_dict': {
5803 'title': 'Uploads from Interstellar Movie',
5804 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5805 'tags': [],
5806 'view_count': int,
5807 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5808 'channel_url': 'https://www.youtube.com/channel/UCXw-G3eDE9trcvY2sBMM_aA',
5809 'channel': 'Interstellar Movie',
5810 'description': '',
5811 'modified_date': r're:\d{8}',
5812 'availability': 'public',
5813 'uploader_id': '@InterstellarMovie',
5814 'uploader': 'Interstellar Movie',
5815 'uploader_url': 'https://www.youtube.com/@InterstellarMovie',
5816 },
5817 'playlist_mincount': 21,
5818 }, {
5819 'note': 'Playlist with "show unavailable videos" button',
5820 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5821 'info_dict': {
5822 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5823 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5824 'view_count': int,
5825 'channel': 'Phim Siêu Nhân Nhật Bản',
5826 'tags': [],
5827 'description': '',
5828 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5829 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5830 'modified_date': r're:\d{8}',
5831 'availability': 'public',
5832 'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
5833 'uploader_id': '@phimsieunhannhatban',
5834 'uploader': 'Phim Siêu Nhân Nhật Bản',
5835 },
5836 'playlist_mincount': 200,
5837 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5838 }, {
5839 'note': 'Playlist with unavailable videos in page 7',
5840 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5841 'info_dict': {
5842 'title': 'Uploads from BlankTV',
5843 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5844 'channel': 'BlankTV',
5845 'channel_url': 'https://www.youtube.com/channel/UC8l9frL61Yl5KFOl87nIm2w',
5846 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5847 'view_count': int,
5848 'tags': [],
5849 'modified_date': r're:\d{8}',
5850 'description': '',
5851 'availability': 'public',
5852 'uploader_id': '@blanktv',
5853 'uploader': 'BlankTV',
5854 'uploader_url': 'https://www.youtube.com/@blanktv',
5855 },
5856 'playlist_mincount': 1000,
5857 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5858 }, {
5859 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5860 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5861 'info_dict': {
5862 'title': 'Data Analysis with Dr Mike Pound',
5863 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5864 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5865 'tags': [],
5866 'view_count': int,
5867 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5868 'channel_url': 'https://www.youtube.com/channel/UC9-y-6csu5WGm29I7JiwpnA',
5869 'channel': 'Computerphile',
5870 'availability': 'public',
5871 'modified_date': '20190712',
5872 'uploader_id': '@Computerphile',
5873 'uploader': 'Computerphile',
5874 'uploader_url': 'https://www.youtube.com/@Computerphile',
5875 },
5876 'playlist_mincount': 11,
5877 }, {
5878 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5879 'only_matching': True,
5880 }, {
5881 'note': 'Playlist URL that does not actually serve a playlist',
5882 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5883 'info_dict': {
5884 'id': 'FqZTN594JQw',
5885 'ext': 'webm',
5886 'title': "Smiley's People 01 detective, Adventure Series, Action",
5887 'upload_date': '20150526',
5888 'license': 'Standard YouTube License',
5889 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5890 'categories': ['People & Blogs'],
5891 'tags': list,
5892 'view_count': int,
5893 'like_count': int,
5894 },
5895 'params': {
5896 'skip_download': True,
5897 },
5898 'skip': 'This video is not available.',
5899 'add_ie': [YoutubeIE.ie_key()],
5900 }, {
5901 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5902 'only_matching': True,
5903 }, {
5904 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5905 'only_matching': True,
5906 }, {
5907 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5908 'info_dict': {
5909 'id': 'hGkQjiJLjWQ', # This will keep changing
5910 'ext': 'mp4',
5911 'title': str,
5912 'upload_date': r're:\d{8}',
5913 'description': str,
5914 'categories': ['News & Politics'],
5915 'tags': list,
5916 'like_count': int,
5917 'release_timestamp': int,
5918 'channel': 'Sky News',
5919 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5920 'age_limit': 0,
5921 'view_count': int,
5922 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
5923 'playable_in_embed': True,
5924 'release_date': r're:\d+',
5925 'availability': 'public',
5926 'live_status': 'is_live',
5927 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
5928 'channel_follower_count': int,
5929 'concurrent_view_count': int,
5930 'uploader_url': 'https://www.youtube.com/@SkyNews',
5931 'uploader_id': '@SkyNews',
5932 'uploader': 'Sky News',
5933 'channel_is_verified': True,
5934 },
5935 'params': {
5936 'skip_download': True,
5937 },
5938 'expected_warnings': ['Ignoring subtitle tracks found in '],
5939 }, {
5940 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5941 'info_dict': {
5942 'id': 'a48o2S1cPoo',
5943 'ext': 'mp4',
5944 'title': 'The Young Turks - Live Main Show',
5945 'upload_date': '20150715',
5946 'license': 'Standard YouTube License',
5947 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5948 'categories': ['News & Politics'],
5949 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5950 'like_count': int,
5951 },
5952 'params': {
5953 'skip_download': True,
5954 },
5955 'only_matching': True,
5956 }, {
5957 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5958 'only_matching': True,
5959 }, {
5960 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5961 'only_matching': True,
5962 }, {
5963 'note': 'A channel that is not live. Should raise error',
5964 'url': 'https://www.youtube.com/user/numberphile/live',
5965 'only_matching': True,
5966 }, {
5967 'url': 'https://www.youtube.com/feed/trending',
5968 'only_matching': True,
5969 }, {
5970 'url': 'https://www.youtube.com/feed/library',
5971 'only_matching': True,
5972 }, {
5973 'url': 'https://www.youtube.com/feed/history',
5974 'only_matching': True,
5975 }, {
5976 'url': 'https://www.youtube.com/feed/subscriptions',
5977 'only_matching': True,
5978 }, {
5979 'url': 'https://www.youtube.com/feed/watch_later',
5980 'only_matching': True,
5981 }, {
5982 'note': 'Recommended - redirects to home page.',
5983 'url': 'https://www.youtube.com/feed/recommended',
5984 'only_matching': True,
5985 }, {
5986 'note': 'inline playlist with not always working continuations',
5987 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5988 'only_matching': True,
5989 }, {
5990 'url': 'https://www.youtube.com/course',
5991 'only_matching': True,
5992 }, {
5993 'url': 'https://www.youtube.com/zsecurity',
5994 'only_matching': True,
5995 }, {
5996 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5997 'only_matching': True,
5998 }, {
5999 'url': 'https://www.youtube.com/TheYoungTurks/live',
6000 'only_matching': True,
6001 }, {
6002 'url': 'https://www.youtube.com/hashtag/cctv9',
6003 'info_dict': {
6004 'id': 'cctv9',
6005 'title': 'cctv9 - All',
6006 'tags': [],
6007 },
6008 'playlist_mincount': 300, # not consistent but should be over 300
6009 }, {
6010 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
6011 'only_matching': True,
6012 }, {
6013 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
6014 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6015 'only_matching': True,
6016 }, {
6017 'note': '/browse/ should redirect to /channel/',
6018 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
6019 'only_matching': True,
6020 }, {
6021 'note': 'VLPL, should redirect to playlist?list=PL...',
6022 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6023 'info_dict': {
6024 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6025 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
6026 'title': 'NCS : All Releases 💿',
6027 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
6028 'modified_date': r're:\d{8}',
6029 'view_count': int,
6030 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
6031 'tags': [],
6032 'channel': 'NoCopyrightSounds',
6033 'availability': 'public',
6034 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
6035 'uploader': 'NoCopyrightSounds',
6036 'uploader_id': '@NoCopyrightSounds',
6037 },
6038 'playlist_mincount': 166,
6039 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden', 'YouTube Music is not directly supported'],
6040 }, {
6041 # TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
6042 'note': 'Topic, should redirect to playlist?list=UU...',
6043 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6044 'info_dict': {
6045 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
6046 'title': 'Uploads from Royalty Free Music - Topic',
6047 'tags': [],
6048 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6049 'channel': 'Royalty Free Music - Topic',
6050 'view_count': int,
6051 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6052 'modified_date': r're:\d{8}',
6053 'description': '',
6054 'availability': 'public',
6055 'uploader': 'Royalty Free Music - Topic',
6056 },
6057 'playlist_mincount': 101,
6058 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
6059 }, {
6060 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
6061 # Treat as a general feed
6062 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
6063 'info_dict': {
6064 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
6065 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
6066 'tags': [],
6067 },
6068 'playlist_mincount': 9,
6069 }, {
6070 'note': 'Youtube music Album',
6071 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
6072 'info_dict': {
6073 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
6074 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
6075 'tags': [],
6076 'view_count': int,
6077 'description': '',
6078 'availability': 'unlisted',
6079 'modified_date': r're:\d{8}',
6080 },
6081 'playlist_count': 50,
6082 'expected_warnings': ['YouTube Music is not directly supported'],
6083 }, {
6084 'note': 'unlisted single video playlist',
6085 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
6086 'info_dict': {
6087 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
6088 'title': 'yt-dlp unlisted playlist test',
6089 'availability': 'unlisted',
6090 'tags': [],
6091 'modified_date': '20220418',
6092 'channel': 'colethedj',
6093 'view_count': int,
6094 'description': '',
6095 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
6096 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
6097 'uploader_url': 'https://www.youtube.com/@colethedj1894',
6098 'uploader_id': '@colethedj1894',
6099 'uploader': 'colethedj',
6100 },
6101 'playlist': [{
6102 'info_dict': {
6103 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
6104 'id': 'BaW_jenozKc',
6105 '_type': 'url',
6106 'ie_key': 'Youtube',
6107 'duration': 10,
6108 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
6109 'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
6110 'view_count': int,
6111 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
6112 'channel': 'Philipp Hagemeister',
6113 'uploader_id': '@PhilippHagemeister',
6114 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
6115 'uploader': 'Philipp Hagemeister',
6116 },
6117 }],
6118 'playlist_count': 1,
6119 'params': {'extract_flat': True},
6120 }, {
6121 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
6122 'url': 'https://www.youtube.com/feed/recommended',
6123 'info_dict': {
6124 'id': 'recommended',
6125 'title': 'recommended',
6126 'tags': [],
6127 },
6128 'playlist_mincount': 50,
6129 'params': {
6130 'skip_download': True,
6131 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
6132 },
6133 }, {
6134 'note': 'API Fallback: /videos tab, sorted by oldest first',
6135 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
6136 'info_dict': {
6137 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6138 'title': 'Cody\'sLab - Videos',
6139 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
6140 'channel': 'Cody\'sLab',
6141 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6142 'tags': [],
6143 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6144 'channel_follower_count': int,
6145 },
6146 'playlist_mincount': 650,
6147 'params': {
6148 'skip_download': True,
6149 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
6150 },
6151 'skip': 'Query for sorting no longer works',
6152 }, {
6153 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
6154 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6155 'info_dict': {
6156 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
6157 'title': 'Uploads from Royalty Free Music - Topic',
6158 'modified_date': r're:\d{8}',
6159 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6160 'description': '',
6161 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6162 'tags': [],
6163 'channel': 'Royalty Free Music - Topic',
6164 'view_count': int,
6165 'availability': 'public',
6166 'uploader': 'Royalty Free Music - Topic',
6167 },
6168 'playlist_mincount': 101,
6169 'params': {
6170 'skip_download': True,
6171 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
6172 },
6173 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
6174 }, {
6175 'note': 'non-standard redirect to regional channel',
6176 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
6177 'only_matching': True,
6178 }, {
6179 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
6180 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6181 'info_dict': {
6182 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6183 'modified_date': '20220407',
6184 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
6185 'tags': [],
6186 'availability': 'unlisted',
6187 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
6188 'channel': 'pukkandan',
6189 'description': 'Test for collaborative playlist',
6190 'title': 'yt-dlp test - collaborative playlist',
6191 'view_count': int,
6192 'uploader_url': 'https://www.youtube.com/@pukkandan',
6193 'uploader_id': '@pukkandan',
6194 'uploader': 'pukkandan',
6195 },
6196 'playlist_mincount': 2,
6197 }, {
6198 'note': 'translated tab name',
6199 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
6200 'info_dict': {
6201 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6202 'tags': [],
6203 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6204 'description': 'test description',
6205 'title': 'cole-dlp-test-acc - 再生リスト',
6206 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6207 'channel': 'cole-dlp-test-acc',
6208 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6209 'uploader_id': '@coletdjnz',
6210 'uploader': 'cole-dlp-test-acc',
6211 },
6212 'playlist_mincount': 1,
6213 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6214 'expected_warnings': ['Preferring "ja"'],
6215 }, {
6216 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
6217 'note': 'preferred lang set with playlist with translated video titles',
6218 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6219 'info_dict': {
6220 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6221 'tags': [],
6222 'view_count': int,
6223 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6224 'channel': 'cole-dlp-test-acc',
6225 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6226 'description': 'test',
6227 'title': 'dlp test playlist',
6228 'availability': 'public',
6229 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6230 'uploader_id': '@coletdjnz',
6231 'uploader': 'cole-dlp-test-acc',
6232 },
6233 'playlist_mincount': 1,
6234 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6235 'expected_warnings': ['Preferring "ja"'],
6236 }, {
6237 # shorts audio pivot for 2GtVksBMYFM.
6238 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
6239 'info_dict': {
6240 'id': 'sfv_audio_pivot',
6241 'title': 'sfv_audio_pivot',
6242 'tags': [],
6243 },
6244 'playlist_mincount': 50,
6245
6246 }, {
6247 # Channel with a real live tab (not to be mistaken with streams tab)
6248 # Do not treat like it should redirect to live stream
6249 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
6250 'info_dict': {
6251 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
6252 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
6253 'tags': [],
6254 },
6255 'playlist_mincount': 20,
6256 }, {
6257 # Tab name is not the same as tab id
6258 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
6259 'info_dict': {
6260 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6261 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
6262 'tags': [],
6263 },
6264 'playlist_mincount': 8,
6265 }, {
6266 # Home tab id is literally home. Not to get mistaken with featured
6267 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
6268 'info_dict': {
6269 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6270 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
6271 'tags': [],
6272 },
6273 'playlist_mincount': 8,
6274 }, {
6275 # Should get three playlists for videos, shorts and streams tabs
6276 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6277 'info_dict': {
6278 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6279 'title': 'Polka Ch. 尾丸ポルカ',
6280 'channel_follower_count': int,
6281 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6282 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6283 'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',
6284 'channel': 'Polka Ch. 尾丸ポルカ',
6285 'tags': 'count:35',
6286 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
6287 'uploader': 'Polka Ch. 尾丸ポルカ',
6288 'uploader_id': '@OmaruPolka',
6289 'channel_is_verified': True,
6290 },
6291 'playlist_count': 3,
6292 }, {
6293 # Shorts tab with channel with handle
6294 # TODO: fix channel description
6295 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
6296 'info_dict': {
6297 'id': 'UC0intLFzLaudFG-xAvUEO-A',
6298 'title': 'Not Just Bikes - Shorts',
6299 'tags': 'count:10',
6300 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
6301 'description': 'md5:5e82545b3a041345927a92d0585df247',
6302 'channel_follower_count': int,
6303 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
6304 'channel': 'Not Just Bikes',
6305 'uploader_url': 'https://www.youtube.com/@NotJustBikes',
6306 'uploader': 'Not Just Bikes',
6307 'uploader_id': '@NotJustBikes',
6308 'channel_is_verified': True,
6309 },
6310 'playlist_mincount': 10,
6311 }, {
6312 # Streams tab
6313 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
6314 'info_dict': {
6315 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6316 'title': '中村悠一 - Live',
6317 'tags': 'count:7',
6318 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6319 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
6320 'channel': '中村悠一',
6321 'channel_follower_count': int,
6322 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
6323 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
6324 'uploader_id': '@Yuichi-Nakamura',
6325 'uploader': '中村悠一',
6326 },
6327 'playlist_mincount': 60,
6328 }, {
6329 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
6330 # See test_youtube_lists
6331 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
6332 'only_matching': True,
6333 }, {
6334 # No uploads and no UCID given. Should fail with no uploads error
6335 # See test_youtube_lists
6336 'url': 'https://www.youtube.com/news',
6337 'only_matching': True,
6338 }, {
6339 # No videos tab but has a shorts tab
6340 'url': 'https://www.youtube.com/c/TKFShorts',
6341 'info_dict': {
6342 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6343 'title': 'Shorts Break - Shorts',
6344 'tags': 'count:48',
6345 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6346 'channel': 'Shorts Break',
6347 'description': 'md5:6de33c5e7ba686e5f3efd4e19c7ef499',
6348 'channel_follower_count': int,
6349 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
6350 'uploader_url': 'https://www.youtube.com/@ShortsBreak_Official',
6351 'uploader': 'Shorts Break',
6352 'uploader_id': '@ShortsBreak_Official',
6353 },
6354 'playlist_mincount': 30,
6355 }, {
6356 # Trending Now Tab. tab id is empty
6357 'url': 'https://www.youtube.com/feed/trending',
6358 'info_dict': {
6359 'id': 'trending',
6360 'title': 'trending - Now',
6361 'tags': [],
6362 },
6363 'playlist_mincount': 30,
6364 }, {
6365 # Trending Gaming Tab. tab id is empty
6366 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
6367 'info_dict': {
6368 'id': 'trending',
6369 'title': 'trending - Gaming',
6370 'tags': [],
6371 },
6372 'playlist_mincount': 30,
6373 }, {
6374 # Shorts url result in shorts tab
6375 # TODO: Fix channel id extraction
6376 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
6377 'info_dict': {
6378 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6379 'title': 'cole-dlp-test-acc - Shorts',
6380 'channel': 'cole-dlp-test-acc',
6381 'description': 'test description',
6382 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6383 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6384 'tags': [],
6385 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6386 'uploader_id': '@coletdjnz',
6387 'uploader': 'cole-dlp-test-acc',
6388 },
6389 'playlist': [{
6390 'info_dict': {
6391 # Channel data is not currently available for short renderers (as of 2023-03-01)
6392 '_type': 'url',
6393 'ie_key': 'Youtube',
6394 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
6395 'id': 'sSM9J5YH_60',
6396 'title': 'SHORT short',
6397 'view_count': int,
6398 'thumbnails': list,
6399 },
6400 }],
6401 'params': {'extract_flat': True},
6402 }, {
6403 # Live video status should be extracted
6404 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
6405 'info_dict': {
6406 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6407 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO: should be Minecraft - Live or Minecraft - Topic - Live
6408 'tags': [],
6409 },
6410 'playlist': [{
6411 'info_dict': {
6412 '_type': 'url',
6413 'ie_key': 'Youtube',
6414 'url': 'startswith:https://www.youtube.com/watch?v=',
6415 'id': str,
6416 'title': str,
6417 'live_status': 'is_live',
6418 'channel_id': str,
6419 'channel_url': str,
6420 'concurrent_view_count': int,
6421 'channel': str,
6422 'uploader': str,
6423 'uploader_url': str,
6424 'uploader_id': str,
6425 'channel_is_verified': bool, # this will keep changing
6426 },
6427 }],
6428 'params': {'extract_flat': True, 'playlist_items': '1'},
6429 'playlist_mincount': 1,
6430 }, {
6431 # Channel renderer metadata. Contains number of videos on the channel
6432 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
6433 'info_dict': {
6434 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6435 'title': 'cole-dlp-test-acc - Channels',
6436 'channel': 'cole-dlp-test-acc',
6437 'description': 'test description',
6438 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6439 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6440 'tags': [],
6441 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6442 'uploader_id': '@coletdjnz',
6443 'uploader': 'cole-dlp-test-acc',
6444 },
6445 'playlist': [{
6446 'info_dict': {
6447 '_type': 'url',
6448 'ie_key': 'YoutubeTab',
6449 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6450 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6451 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6452 'title': 'PewDiePie',
6453 'channel': 'PewDiePie',
6454 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6455 'thumbnails': list,
6456 'channel_follower_count': int,
6457 'playlist_count': int,
6458 'uploader': 'PewDiePie',
6459 'uploader_url': 'https://www.youtube.com/@PewDiePie',
6460 'uploader_id': '@PewDiePie',
6461 'channel_is_verified': True,
6462 },
6463 }],
6464 'params': {'extract_flat': True},
6465 }, {
6466 'url': 'https://www.youtube.com/@3blue1brown/about',
6467 'info_dict': {
6468 'id': '@3blue1brown',
6469 'tags': ['Mathematics'],
6470 'title': '3Blue1Brown',
6471 'channel_follower_count': int,
6472 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6473 'channel': '3Blue1Brown',
6474 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
6475 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
6476 'uploader_url': 'https://www.youtube.com/@3blue1brown',
6477 'uploader_id': '@3blue1brown',
6478 'uploader': '3Blue1Brown',
6479 'channel_is_verified': True,
6480 },
6481 'playlist_count': 0,
6482 }, {
6483 # Podcasts tab, with rich entry playlistRenderers
6484 'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
6485 'info_dict': {
6486 'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6487 'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6488 'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
6489 'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
6490 'title': '99 Percent Invisible - Podcasts',
6491 'uploader': '99 Percent Invisible',
6492 'channel_follower_count': int,
6493 'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6494 'tags': [],
6495 'channel': '99 Percent Invisible',
6496 'uploader_id': '@99percentinvisiblepodcast',
6497 },
6498 'playlist_count': 0,
6499 }, {
6500 # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
6501 'url': 'https://www.youtube.com/@AHimitsu/releases',
6502 'info_dict': {
6503 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6504 'channel': 'A Himitsu',
6505 'uploader_url': 'https://www.youtube.com/@AHimitsu',
6506 'title': 'A Himitsu - Releases',
6507 'uploader_id': '@AHimitsu',
6508 'uploader': 'A Himitsu',
6509 'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6510 'tags': 'count:12',
6511 'description': 'I make music',
6512 'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
6513 'channel_follower_count': int,
6514 'channel_is_verified': True,
6515 },
6516 'playlist_mincount': 10,
6517 }, {
6518 # Playlist with only shorts, shown as reel renderers
6519 # FIXME: future: YouTube currently doesn't give continuation for this,
6520 # may do in future.
6521 'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
6522 'info_dict': {
6523 'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
6524 'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
6525 'view_count': int,
6526 'uploader_id': '@BangyShorts',
6527 'description': '',
6528 'uploader_url': 'https://www.youtube.com/@BangyShorts',
6529 'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
6530 'channel': 'Bangy Shorts',
6531 'uploader': 'Bangy Shorts',
6532 'tags': [],
6533 'availability': 'public',
6534 'modified_date': r're:\d{8}',
6535 'title': 'Uploads from Bangy Shorts',
6536 },
6537 'playlist_mincount': 100,
6538 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6539 }, {
6540 'note': 'Tags containing spaces',
6541 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6542 'playlist_count': 3,
6543 'info_dict': {
6544 'id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6545 'channel': 'Markiplier',
6546 'channel_id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6547 'title': 'Markiplier',
6548 'channel_follower_count': int,
6549 'description': 'md5:0c010910558658824402809750dc5d97',
6550 'uploader_id': '@markiplier',
6551 'uploader_url': 'https://www.youtube.com/@markiplier',
6552 'uploader': 'Markiplier',
6553 'channel_url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6554 'channel_is_verified': True,
6555 'tags': ['markiplier', 'comedy', 'gaming', 'funny videos', 'funny moments',
6556 'sketch comedy', 'laughing', 'lets play', 'challenge videos', 'hilarious',
6557 'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
6558 'mark fischbach'],
6559 },
6560 }]
6561
6562 @classmethod
6563 def suitable(cls, url):
6564 return False if YoutubeIE.suitable(url) else super().suitable(url)
6565
6566 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
6567
6568 def _get_url_mobj(self, url):
6569 mobj = self._URL_RE.match(url).groupdict()
6570 mobj.update((k, '') for k, v in mobj.items() if v is None)
6571 return mobj
6572
6573 def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
6574 tab_name = (tab.get('title') or '').lower()
6575 tab_url = urljoin(base_url, traverse_obj(
6576 tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
6577
6578 tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
6579 or traverse_obj(tab, 'tabIdentifier', expected_type=str))
6580 if tab_id:
6581 return {
6582 'TAB_ID_SPONSORSHIPS': 'membership',
6583 }.get(tab_id, tab_id), tab_name
6584
6585 # Fallback to tab name if we cannot get the tab id.
6586 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
6587 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
6588 if tab_name:
6589 self.write_debug(f'Falling back to selected tab name: {tab_name}')
6590 return {
6591 'home': 'featured',
6592 'live': 'streams',
6593 }.get(tab_name, tab_name), tab_name
6594
6595 def _has_tab(self, tabs, tab_id):
6596 return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
6597
6598 def _empty_playlist(self, item_id, data):
6599 return self.playlist_result([], item_id, **self._extract_metadata_from_tabs(item_id, data))
6600
6601 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
6602 def _real_extract(self, url, smuggled_data):
6603 item_id = self._match_id(url)
6604 url = urllib.parse.urlunparse(
6605 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
6606 compat_opts = self.get_param('compat_opts', [])
6607
6608 mobj = self._get_url_mobj(url)
6609 pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
6610 if is_channel and smuggled_data.get('is_music_url'):
6611 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
6612 return self.url_result(
6613 f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
6614 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
6615 mdata = self._extract_tab_endpoint(
6616 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
6617 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
6618 get_all=False, expected_type=str)
6619 if not murl:
6620 raise ExtractorError('Failed to resolve album to playlist')
6621 return self.url_result(murl, YoutubeTabIE)
6622 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
6623 return self.url_result(
6624 f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
6625
6626 original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
6627 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
6628 url = f'{pre}/videos{post}'
6629 if smuggled_data.get('is_music_url'):
6630 self.report_warning(f'YouTube Music is not directly supported. Redirecting to {url}')
6631
6632 # Handle both video/playlist URLs
6633 qs = parse_qs(url)
6634 video_id, playlist_id = (traverse_obj(qs, (key, 0)) for key in ('v', 'list'))
6635 if not video_id and mobj['not_channel'].startswith('watch'):
6636 if not playlist_id:
6637 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
6638 raise ExtractorError('A video URL was given without video ID', expected=True)
6639 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6640 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
6641 return self.url_result(
6642 f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
6643
6644 if not self._yes_playlist(playlist_id, video_id):
6645 return self.url_result(
6646 f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
6647
6648 data, ytcfg = self._extract_data(url, display_id)
6649
6650 # YouTube may provide a non-standard redirect to the regional channel
6651 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
6652 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
6653 redirect_url = traverse_obj(
6654 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
6655 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
6656 redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
6657 self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
6658 return self.url_result(redirect_url, YoutubeTabIE)
6659
6660 tabs, extra_tabs = self._extract_tab_renderers(data), []
6661 if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
6662 selected_tab = self._extract_selected_tab(tabs)
6663 selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
6664 self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
6665
6666 # /about is no longer a tab
6667 if original_tab_id == 'about':
6668 return self._empty_playlist(item_id, data)
6669
6670 if not original_tab_id and selected_tab_name:
6671 self.to_screen('Downloading all uploads of the channel. '
6672 'To download only the videos in a specific tab, pass the tab\'s URL')
6673 if self._has_tab(tabs, 'streams'):
6674 extra_tabs.append(''.join((pre, '/streams', post)))
6675 if self._has_tab(tabs, 'shorts'):
6676 extra_tabs.append(''.join((pre, '/shorts', post)))
6677 # XXX: Members-only tab should also be extracted
6678
6679 if not extra_tabs and selected_tab_id != 'videos':
6680 # Channel does not have streams, shorts or videos tabs
6681 if item_id[:2] != 'UC':
6682 return self._empty_playlist(item_id, data)
6683
6684 # Topic channels don't have /videos. Use the equivalent playlist instead
6685 pl_id = f'UU{item_id[2:]}'
6686 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
6687 try:
6688 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
6689 except ExtractorError:
6690 return self._empty_playlist(item_id, data)
6691 else:
6692 item_id, url = pl_id, pl_url
6693 self.to_screen(
6694 f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
6695
6696 elif extra_tabs and selected_tab_id != 'videos':
6697 # When there are shorts/live tabs but not videos tab
6698 url, data = f'{pre}{post}', None
6699
6700 elif (original_tab_id or 'videos') != selected_tab_id:
6701 if original_tab_id == 'live':
6702 # Live tab should have redirected to the video
6703 # Except in the case the channel has an actual live tab
6704 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
6705 raise UserNotLive(video_id=item_id)
6706 elif selected_tab_name:
6707 raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
6708
6709 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
6710 url = f'{pre}{post}'
6711
6712 # YouTube sometimes provides a button to reload playlist with unavailable videos.
6713 if 'no-youtube-unavailable-videos' not in compat_opts:
6714 data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
6715 self._extract_and_report_alerts(data, only_once=True)
6716
6717 tabs, entries = self._extract_tab_renderers(data), []
6718 if tabs:
6719 entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
6720 entries[0].update({
6721 'extractor_key': YoutubeTabIE.ie_key(),
6722 'extractor': YoutubeTabIE.IE_NAME,
6723 'webpage_url': url,
6724 })
6725 if self.get_param('playlist_items') == '0':
6726 entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
6727 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
6728 entries.extend(map(self._real_extract, extra_tabs))
6729
6730 if len(entries) == 1:
6731 return entries[0]
6732 elif entries:
6733 metadata = self._extract_metadata_from_tabs(item_id, data)
6734 uploads_url = 'the Uploads (UU) playlist URL'
6735 if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
6736 uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
6737 self.to_screen(
6738 'Downloading as multiple playlists, separated by tabs. '
6739 f'To download as a single playlist instead, pass {uploads_url}')
6740 return self.playlist_result(entries, item_id, **metadata)
6741
6742 # Inline playlist
6743 playlist = traverse_obj(
6744 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
6745 if playlist:
6746 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
6747
6748 video_id = traverse_obj(
6749 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
6750 if video_id:
6751 if tab != '/live': # live tab is expected to redirect to video
6752 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
6753 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
6754
6755 raise ExtractorError('Unable to recognize tab page')
6756
6757
6758 class YoutubePlaylistIE(InfoExtractor):
6759 IE_DESC = 'YouTube playlists'
6760 _VALID_URL = r'''(?x)(?:
6761 (?:https?://)?
6762 (?:\w+\.)?
6763 (?:
6764 (?:
6765 youtube(?:kids)?\.com|
6766 {invidious}
6767 )
6768 /.*?\?.*?\blist=
6769 )?
6770 (?P<id>{playlist_id})
6771 )'''.format(
6772 playlist_id=YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
6773 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
6774 )
6775 IE_NAME = 'youtube:playlist'
6776 _TESTS = [{
6777 'note': 'issue #673',
6778 'url': 'PLBB231211A4F62143',
6779 'info_dict': {
6780 'title': '[OLD]Team Fortress 2 (Class-based LP)',
6781 'id': 'PLBB231211A4F62143',
6782 'uploader': 'Wickman',
6783 'uploader_id': '@WickmanVT',
6784 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
6785 'view_count': int,
6786 'uploader_url': 'https://www.youtube.com/@WickmanVT',
6787 'modified_date': r're:\d{8}',
6788 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6789 'channel': 'Wickman',
6790 'tags': [],
6791 'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
6792 'availability': 'public',
6793 },
6794 'playlist_mincount': 29,
6795 }, {
6796 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6797 'info_dict': {
6798 'title': 'YDL_safe_search',
6799 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6800 },
6801 'playlist_count': 2,
6802 'skip': 'This playlist is private',
6803 }, {
6804 'note': 'embedded',
6805 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6806 'playlist_count': 4,
6807 'info_dict': {
6808 'title': 'JODA15',
6809 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6810 'uploader': 'milan',
6811 'uploader_id': '@milan5503',
6812 'description': '',
6813 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6814 'tags': [],
6815 'modified_date': '20140919',
6816 'view_count': int,
6817 'channel': 'milan',
6818 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
6819 'uploader_url': 'https://www.youtube.com/@milan5503',
6820 'availability': 'public',
6821 },
6822 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],
6823 }, {
6824 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6825 'playlist_mincount': 455,
6826 'info_dict': {
6827 'title': '2018 Chinese New Singles (11/6 updated)',
6828 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6829 'uploader': 'LBK',
6830 'uploader_id': '@music_king',
6831 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
6832 'channel': 'LBK',
6833 'view_count': int,
6834 'channel_url': 'https://www.youtube.com/channel/UC21nz3_MesPLqtDqwdvnoxA',
6835 'tags': [],
6836 'uploader_url': 'https://www.youtube.com/@music_king',
6837 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6838 'modified_date': r're:\d{8}',
6839 'availability': 'public',
6840 },
6841 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6842 }, {
6843 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
6844 'only_matching': True,
6845 }, {
6846 # music album playlist
6847 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
6848 'only_matching': True,
6849 }]
6850
6851 @classmethod
6852 def suitable(cls, url):
6853 if YoutubeTabIE.suitable(url):
6854 return False
6855 from ..utils import parse_qs
6856 qs = parse_qs(url)
6857 if qs.get('v', [None])[0]:
6858 return False
6859 return super().suitable(url)
6860
6861 def _real_extract(self, url):
6862 playlist_id = self._match_id(url)
6863 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
6864 url = update_url_query(
6865 'https://www.youtube.com/playlist',
6866 parse_qs(url) or {'list': playlist_id})
6867 if is_music_url:
6868 url = smuggle_url(url, {'is_music_url': True})
6869 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
6870
6871
6872 class YoutubeYtBeIE(InfoExtractor):
6873 IE_DESC = 'youtu.be'
6874 _VALID_URL = rf'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{{11}})/*?.*?\blist=(?P<playlist_id>{YoutubeBaseInfoExtractor._PLAYLIST_ID_RE})'
6875 _TESTS = [{
6876 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
6877 'info_dict': {
6878 'id': 'yeWKywCrFtk',
6879 'ext': 'mp4',
6880 'title': 'Small Scale Baler and Braiding Rugs',
6881 'uploader': 'Backus-Page House Museum',
6882 'uploader_id': '@backuspagemuseum',
6883 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
6884 'upload_date': '20161008',
6885 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
6886 'categories': ['Nonprofits & Activism'],
6887 'tags': list,
6888 'like_count': int,
6889 'age_limit': 0,
6890 'playable_in_embed': True,
6891 'thumbnail': r're:^https?://.*\.webp',
6892 'channel': 'Backus-Page House Museum',
6893 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
6894 'live_status': 'not_live',
6895 'view_count': int,
6896 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
6897 'availability': 'public',
6898 'duration': 59,
6899 'comment_count': int,
6900 'channel_follower_count': int,
6901 },
6902 'params': {
6903 'noplaylist': True,
6904 'skip_download': True,
6905 },
6906 }, {
6907 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
6908 'only_matching': True,
6909 }]
6910
6911 def _real_extract(self, url):
6912 mobj = self._match_valid_url(url)
6913 video_id = mobj.group('id')
6914 playlist_id = mobj.group('playlist_id')
6915 return self.url_result(
6916 update_url_query('https://www.youtube.com/watch', {
6917 'v': video_id,
6918 'list': playlist_id,
6919 'feature': 'youtu.be',
6920 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
6921
6922
6923 class YoutubeLivestreamEmbedIE(InfoExtractor):
6924 IE_DESC = 'YouTube livestream embeds'
6925 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
6926 _TESTS = [{
6927 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
6928 'only_matching': True,
6929 }]
6930
6931 def _real_extract(self, url):
6932 channel_id = self._match_id(url)
6933 return self.url_result(
6934 f'https://www.youtube.com/channel/{channel_id}/live',
6935 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
6936
6937
6938 class YoutubeYtUserIE(InfoExtractor):
6939 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
6940 IE_NAME = 'youtube:user'
6941 _VALID_URL = r'ytuser:(?P<id>.+)'
6942 _TESTS = [{
6943 'url': 'ytuser:phihag',
6944 'only_matching': True,
6945 }]
6946
6947 def _real_extract(self, url):
6948 user_id = self._match_id(url)
6949 return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
6950
6951
6952 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
6953 IE_NAME = 'youtube:favorites'
6954 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
6955 _VALID_URL = r':ytfav(?:ou?rite)?s?'
6956 _LOGIN_REQUIRED = True
6957 _TESTS = [{
6958 'url': ':ytfav',
6959 'only_matching': True,
6960 }, {
6961 'url': ':ytfavorites',
6962 'only_matching': True,
6963 }]
6964
6965 def _real_extract(self, url):
6966 return self.url_result(
6967 'https://www.youtube.com/playlist?list=LL',
6968 ie=YoutubeTabIE.ie_key())
6969
6970
6971 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
6972 IE_NAME = 'youtube:notif'
6973 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6974 _VALID_URL = r':ytnotif(?:ication)?s?'
6975 _LOGIN_REQUIRED = True
6976 _TESTS = [{
6977 'url': ':ytnotif',
6978 'only_matching': True,
6979 }, {
6980 'url': ':ytnotifications',
6981 'only_matching': True,
6982 }]
6983
6984 def _extract_notification_menu(self, response, continuation_list):
6985 notification_list = traverse_obj(
6986 response,
6987 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6988 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6989 expected_type=list) or []
6990 continuation_list[0] = None
6991 for item in notification_list:
6992 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
6993 if entry:
6994 yield entry
6995 continuation = item.get('continuationItemRenderer')
6996 if continuation:
6997 continuation_list[0] = continuation
6998
6999 def _extract_notification_renderer(self, notification):
7000 video_id = traverse_obj(
7001 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
7002 url = f'https://www.youtube.com/watch?v={video_id}'
7003 channel_id = None
7004 if not video_id:
7005 browse_ep = traverse_obj(
7006 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
7007 channel_id = self.ucid_or_none(traverse_obj(browse_ep, 'browseId', expected_type=str))
7008 post_id = self._search_regex(
7009 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
7010 'post id', default=None)
7011 if not channel_id or not post_id:
7012 return
7013 # The direct /post url redirects to this in the browser
7014 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
7015
7016 channel = traverse_obj(
7017 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
7018 expected_type=str)
7019 notification_title = self._get_text(notification, 'shortMessage')
7020 if notification_title:
7021 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
7022 # TODO: handle recommended videos
7023 title = self._search_regex(
7024 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
7025 'video title', default=None)
7026 timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
7027 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
7028 else None)
7029 return {
7030 '_type': 'url',
7031 'url': url,
7032 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
7033 'video_id': video_id,
7034 'title': title,
7035 'channel_id': channel_id,
7036 'channel': channel,
7037 'uploader': channel,
7038 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
7039 'timestamp': timestamp,
7040 }
7041
7042 def _notification_menu_entries(self, ytcfg):
7043 continuation_list = [None]
7044 response = None
7045 for page in itertools.count(1):
7046 ctoken = traverse_obj(
7047 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
7048 response = self._extract_response(
7049 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
7050 ep='notification/get_notification_menu', check_get_keys='actions',
7051 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
7052 yield from self._extract_notification_menu(response, continuation_list)
7053 if not continuation_list[0]:
7054 break
7055
7056 def _real_extract(self, url):
7057 display_id = 'notifications'
7058 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
7059 self._report_playlist_authcheck(ytcfg)
7060 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
7061
7062
7063 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
7064 IE_DESC = 'YouTube search'
7065 IE_NAME = 'youtube:search'
7066 _SEARCH_KEY = 'ytsearch'
7067 _SEARCH_PARAMS = 'EgIQAfABAQ==' # Videos only
7068 _TESTS = [{
7069 'url': 'ytsearch5:youtube-dl test video',
7070 'playlist_count': 5,
7071 'info_dict': {
7072 'id': 'youtube-dl test video',
7073 'title': 'youtube-dl test video',
7074 },
7075 }, {
7076 'note': 'Suicide/self-harm search warning',
7077 'url': 'ytsearch1:i hate myself and i wanna die',
7078 'playlist_count': 1,
7079 'info_dict': {
7080 'id': 'i hate myself and i wanna die',
7081 'title': 'i hate myself and i wanna die',
7082 },
7083 }]
7084
7085
7086 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
7087 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
7088 _SEARCH_KEY = 'ytsearchdate'
7089 IE_DESC = 'YouTube search, newest videos first'
7090 _SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date
7091 _TESTS = [{
7092 'url': 'ytsearchdate5:youtube-dl test video',
7093 'playlist_count': 5,
7094 'info_dict': {
7095 'id': 'youtube-dl test video',
7096 'title': 'youtube-dl test video',
7097 },
7098 }]
7099
7100
7101 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
7102 IE_DESC = 'YouTube search URLs with sorting and filter support'
7103 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
7104 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7105 _TESTS = [{
7106 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
7107 'playlist_mincount': 5,
7108 'info_dict': {
7109 'id': 'youtube-dl test video',
7110 'title': 'youtube-dl test video',
7111 },
7112 }, {
7113 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
7114 'playlist_mincount': 5,
7115 'info_dict': {
7116 'id': 'python',
7117 'title': 'python',
7118 },
7119 }, {
7120 'url': 'https://www.youtube.com/results?search_query=%23cats',
7121 'playlist_mincount': 1,
7122 'info_dict': {
7123 'id': '#cats',
7124 'title': '#cats',
7125 # The test suite does not have support for nested playlists
7126 # 'entries': [{
7127 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
7128 # 'title': '#cats',
7129 # }],
7130 },
7131 }, {
7132 # Channel results
7133 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
7134 'info_dict': {
7135 'id': 'kurzgesagt',
7136 'title': 'kurzgesagt',
7137 },
7138 'playlist': [{
7139 'info_dict': {
7140 '_type': 'url',
7141 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7142 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7143 'ie_key': 'YoutubeTab',
7144 'channel': 'Kurzgesagt – In a Nutshell',
7145 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
7146 'title': 'Kurzgesagt – In a Nutshell',
7147 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7148 # No longer available for search as it is set to the handle.
7149 # 'playlist_count': int,
7150 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7151 'thumbnails': list,
7152 'uploader_id': '@kurzgesagt',
7153 'uploader_url': 'https://www.youtube.com/@kurzgesagt',
7154 'uploader': 'Kurzgesagt – In a Nutshell',
7155 'channel_is_verified': True,
7156 'channel_follower_count': int,
7157 },
7158 }],
7159 'params': {'extract_flat': True, 'playlist_items': '1'},
7160 'playlist_mincount': 1,
7161 }, {
7162 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
7163 'only_matching': True,
7164 }]
7165
7166 def _real_extract(self, url):
7167 qs = parse_qs(url)
7168 query = (qs.get('search_query') or qs.get('q'))[0]
7169 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
7170
7171
7172 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
7173 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
7174 IE_NAME = 'youtube:music:search_url'
7175 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7176 _TESTS = [{
7177 'url': 'https://music.youtube.com/search?q=royalty+free+music',
7178 'playlist_count': 16,
7179 'info_dict': {
7180 'id': 'royalty free music',
7181 'title': 'royalty free music',
7182 },
7183 }, {
7184 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
7185 'playlist_mincount': 30,
7186 'info_dict': {
7187 'id': 'royalty free music - songs',
7188 'title': 'royalty free music - songs',
7189 },
7190 'params': {'extract_flat': 'in_playlist'},
7191 }, {
7192 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
7193 'playlist_mincount': 30,
7194 'info_dict': {
7195 'id': 'royalty free music - community playlists',
7196 'title': 'royalty free music - community playlists',
7197 },
7198 'params': {'extract_flat': 'in_playlist'},
7199 }]
7200
7201 _SECTIONS = {
7202 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
7203 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
7204 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
7205 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
7206 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
7207 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
7208 }
7209
7210 def _real_extract(self, url):
7211 qs = parse_qs(url)
7212 query = (qs.get('search_query') or qs.get('q'))[0]
7213 params = qs.get('sp', (None,))[0]
7214 if params:
7215 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
7216 else:
7217 section = urllib.parse.unquote_plus(([*url.split('#'), ''])[1]).lower()
7218 params = self._SECTIONS.get(section)
7219 if not params:
7220 section = None
7221 title = join_nonempty(query, section, delim=' - ')
7222 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
7223
7224
7225 class YoutubeFeedsInfoExtractor(InfoExtractor):
7226 """
7227 Base class for feed extractors
7228 Subclasses must re-define the _FEED_NAME property.
7229 """
7230 _LOGIN_REQUIRED = True
7231 _FEED_NAME = 'feeds'
7232
7233 def _real_initialize(self):
7234 YoutubeBaseInfoExtractor._check_login_required(self)
7235
7236 @classproperty
7237 def IE_NAME(cls):
7238 return f'youtube:{cls._FEED_NAME}'
7239
7240 def _real_extract(self, url):
7241 return self.url_result(
7242 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
7243
7244
7245 class YoutubeWatchLaterIE(InfoExtractor):
7246 IE_NAME = 'youtube:watchlater'
7247 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
7248 _VALID_URL = r':ytwatchlater'
7249 _TESTS = [{
7250 'url': ':ytwatchlater',
7251 'only_matching': True,
7252 }]
7253
7254 def _real_extract(self, url):
7255 return self.url_result(
7256 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
7257
7258
7259 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
7260 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
7261 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
7262 _FEED_NAME = 'recommended'
7263 _LOGIN_REQUIRED = False
7264 _TESTS = [{
7265 'url': ':ytrec',
7266 'only_matching': True,
7267 }, {
7268 'url': ':ytrecommended',
7269 'only_matching': True,
7270 }, {
7271 'url': 'https://youtube.com',
7272 'only_matching': True,
7273 }]
7274
7275
7276 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
7277 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
7278 _VALID_URL = r':ytsub(?:scription)?s?'
7279 _FEED_NAME = 'subscriptions'
7280 _TESTS = [{
7281 'url': ':ytsubs',
7282 'only_matching': True,
7283 }, {
7284 'url': ':ytsubscriptions',
7285 'only_matching': True,
7286 }]
7287
7288
7289 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
7290 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
7291 _VALID_URL = r':ythis(?:tory)?'
7292 _FEED_NAME = 'history'
7293 _TESTS = [{
7294 'url': ':ythistory',
7295 'only_matching': True,
7296 }]
7297
7298
7299 class YoutubeShortsAudioPivotIE(InfoExtractor):
7300 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
7301 IE_NAME = 'youtube:shorts:pivot:audio'
7302 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
7303 _TESTS = [{
7304 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
7305 'only_matching': True,
7306 }]
7307
7308 @staticmethod
7309 def _generate_audio_pivot_params(video_id):
7310 """
7311 Generates sfv_audio_pivot browse params for this video id
7312 """
7313 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
7314 return urllib.parse.quote(base64.b64encode(pb_params).decode())
7315
7316 def _real_extract(self, url):
7317 video_id = self._match_id(url)
7318 return self.url_result(
7319 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
7320 ie=YoutubeTabIE)
7321
7322
7323 class YoutubeTruncatedURLIE(InfoExtractor):
7324 IE_NAME = 'youtube:truncated_url'
7325 IE_DESC = False # Do not list
7326 _VALID_URL = r'''(?x)
7327 (?:https?://)?
7328 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
7329 (?:watch\?(?:
7330 feature=[a-z_]+|
7331 annotation_id=annotation_[^&]+|
7332 x-yt-cl=[0-9]+|
7333 hl=[^&]*|
7334 t=[0-9]+
7335 )?
7336 |
7337 attribution_link\?a=[^&]+
7338 )
7339 $
7340 '''
7341
7342 _TESTS = [{
7343 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
7344 'only_matching': True,
7345 }, {
7346 'url': 'https://www.youtube.com/watch?',
7347 'only_matching': True,
7348 }, {
7349 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
7350 'only_matching': True,
7351 }, {
7352 'url': 'https://www.youtube.com/watch?feature=foo',
7353 'only_matching': True,
7354 }, {
7355 'url': 'https://www.youtube.com/watch?hl=en-GB',
7356 'only_matching': True,
7357 }, {
7358 'url': 'https://www.youtube.com/watch?t=2372',
7359 'only_matching': True,
7360 }]
7361
7362 def _real_extract(self, url):
7363 raise ExtractorError(
7364 'Did you forget to quote the URL? Remember that & is a meta '
7365 'character in most shells, so you want to put the URL in quotes, '
7366 'like youtube-dl '
7367 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
7368 ' or simply youtube-dl BaW_jenozKc .',
7369 expected=True)
7370
7371
7372 class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
7373 IE_NAME = 'youtube:clip'
7374 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
7375 _TESTS = [{
7376 # FIXME: Other metadata should be extracted from the clip, not from the base video
7377 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
7378 'info_dict': {
7379 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
7380 'ext': 'mp4',
7381 'section_start': 29.0,
7382 'section_end': 39.7,
7383 'duration': 10.7,
7384 'age_limit': 0,
7385 'availability': 'public',
7386 'categories': ['Gaming'],
7387 'channel': 'Scott The Woz',
7388 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
7389 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
7390 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
7391 'like_count': int,
7392 'playable_in_embed': True,
7393 'tags': 'count:17',
7394 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
7395 'title': 'Mobile Games on Console - Scott The Woz',
7396 'upload_date': '20210920',
7397 'uploader': 'Scott The Woz',
7398 'uploader_id': '@ScottTheWoz',
7399 'uploader_url': 'https://www.youtube.com/@ScottTheWoz',
7400 'view_count': int,
7401 'live_status': 'not_live',
7402 'channel_follower_count': int,
7403 'chapters': 'count:20',
7404 'comment_count': int,
7405 'heatmap': 'count:100',
7406 },
7407 }]
7408
7409 def _real_extract(self, url):
7410 clip_id = self._match_id(url)
7411 _, data = self._extract_webpage(url, clip_id)
7412
7413 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
7414 if not video_id:
7415 raise ExtractorError('Unable to find video ID')
7416
7417 clip_data = traverse_obj(data, (
7418 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
7419 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
7420 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
7421 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
7422
7423 return {
7424 '_type': 'url_transparent',
7425 'url': f'https://www.youtube.com/watch?v={video_id}',
7426 'ie_key': YoutubeIE.ie_key(),
7427 'id': clip_id,
7428 'section_start': int(clip_data['startTimeMs']) / 1000,
7429 'section_end': int(clip_data['endTimeMs']) / 1000,
7430 }
7431
7432
7433 class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
7434 IE_NAME = 'youtube:consent'
7435 IE_DESC = False # Do not list
7436 _VALID_URL = r'https?://consent\.youtube\.com/m\?'
7437 _TESTS = [{
7438 'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',
7439 'info_dict': {
7440 'id': 'qVv6vCqciTM',
7441 'ext': 'mp4',
7442 'age_limit': 0,
7443 'uploader_id': '@sana_natori',
7444 'comment_count': int,
7445 'chapters': 'count:13',
7446 'upload_date': '20221223',
7447 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
7448 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
7449 'uploader_url': 'https://www.youtube.com/@sana_natori',
7450 'like_count': int,
7451 'release_date': '20221223',
7452 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
7453 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
7454 'view_count': int,
7455 'playable_in_embed': True,
7456 'duration': 4438,
7457 'availability': 'public',
7458 'channel_follower_count': int,
7459 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
7460 'categories': ['Entertainment'],
7461 'live_status': 'was_live',
7462 'release_timestamp': 1671793345,
7463 'channel': 'さなちゃんねる',
7464 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
7465 'uploader': 'さなちゃんねる',
7466 'channel_is_verified': True,
7467 'heatmap': 'count:100',
7468 },
7469 'add_ie': ['Youtube'],
7470 'params': {'skip_download': 'Youtube'},
7471 }]
7472
7473 def _real_extract(self, url):
7474 redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])
7475 if not redirect_url:
7476 raise ExtractorError('Invalid cookie consent redirect URL', expected=True)
7477 return self.url_result(redirect_url)
7478
7479
7480 class YoutubeTruncatedIDIE(InfoExtractor):
7481 IE_NAME = 'youtube:truncated_id'
7482 IE_DESC = False # Do not list
7483 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
7484
7485 _TESTS = [{
7486 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
7487 'only_matching': True,
7488 }]
7489
7490 def _real_extract(self, url):
7491 video_id = self._match_id(url)
7492 raise ExtractorError(
7493 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
7494 expected=True)